Skip to content
Snippets Groups Projects
Commit af60cd96 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

chore: added type hints in modules

parent c60004ba
No related branches found
No related tags found
1 merge request!22chore: added type hints in modules
1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaa
2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtat
3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgc
4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp
acgtctggagcgtgggttgacccctgtacatggttctttccggatcctta
5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaa
6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcc
7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcaga
8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp
attggcccggtccaggacagagccttatattgctactggtatgagaaccg
9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcac
10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgat
11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtccc
12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp
gaattcctggggatttactcacccccgaggcggacaagatttccagctgg
13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacg
14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgtt
15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaact
16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcac
17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacc
18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttcc
19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataat
20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtac
21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaa
22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtc
23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgt
24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaag
25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgac
26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp
gggtgcgttatggggactaaagactgttactaccggtactccgccttata
27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgct
28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaa
29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgc
30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccc
31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaa
32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatca
33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacct
34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcagg
35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggtt
36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgaga
37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagact
38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcacta
39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatca
40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaac
41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgc
42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcata
43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtatt
44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtatt
45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctat
46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaa
47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatg
48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccg
49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttat
50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp
taactgtcggtcactgctcatcccgactagttcggctcactagacttact
0: length 103 nt
ACGCCCTATGTCAGAGTGGTGTTAGTGCACAATTCATTCCGATATGCGAA
1: length 35 nt
ATAACGAGCGCGAGCCCTTAACGGGCAGGACTGCCGACTCACACCTTAAT
2: length 61 nt
GGTCTCATAAGCACTTCGGGTACATCCTCCTAAATGGGCCGCGACTTAGG
3: length 54 nt
TAGGTCTCGGATACGAGATATAACCGTAGCATGGGAGTGACACCCATGCC
4: length 64 nt
TCTGCGATACCCAATTTAGGGCATGGCATAAGGCCCGGCGGGTATTTCCG
5: length 85 nt
TTGTTAAGCGTGTTGGTAGCGCCCCCAAATTGTCCCAACTGGGCCACACC
6: length 53 nt
ATAGTACAAGTTAGACCTCATGCATTGAGCTGCCCGGTTCGGACCTTAAT
7: length 27 nt
TAAAATATTAAGGATAGCCGGAACGCGCTGGCGACTGACGACACTACACG
8: length 25 nt
TAGATTCCAGGGAATGACGTCACACCGTTCCGTATATCAGCGGCGCGGAA
9: length 52 nt
GGTGCACCTATTAACAGAGTGACTTTGGGGGAATCACTTATTCGCATATA
10: length 41 nt
AACGACAAGCTGAACGAGCTCTGGCGGCTACGCAGTGTTCGCGCAAGCGA
11: length 106 nt
GTAATAAACTAAACATCAGCTGAGGCGTCCGACCACTGCATCTTTTGTTG
12: length 5 nt
GTAATATGGTATAGCTCGACCCTCGTACAACGGGGGTCAGACAGCGTTGC
13: length 49 nt
TTACTTTGCACGCATGCATGGAGGCACCATGAAAGACGGGAAGCGGCTAA
14: length 22 nt
GCAAGGGGTTTGAGGGAGCAACGTAAGATCAGAGATAACAGTCTCAAGAG
15: length 109 nt
AATTGAAGTCAGCAAAGAATTGGGATTGCTGGAAGCAACACGGACACGTT
16: length 43 nt
GAACCATTCTAAACAGCACCACCCGCATCCCGCTTTGCGCAGCAGCCACT
17: length 85 nt
ATGTCTCACAGGGGAGGTCTCCAGGCTTCTACCAAGTTCCCAATTCCATG
18: length 16 nt
ATCTCATCAATTATGTTGTACACCTGCTCGTTCTAACTTGCAGACTGCAT
19: length 52 nt
GAGTATCGCAGTATACTGTTGGCGGACATTCCAGAATATTGGTATTCAGC
20: length 2 nt
ATATAGCTACCCCAGATTGTCTTAACGAGTCCCGCACCCCCCACCAAGAT
21: length 13 nt
GTATTTACCTGCCCACTAGTACAAGTCCCCGAACCTTAGCAATCCTAAGT
22: length 64 nt
TGGCCGTTTCAGAAGGATTCCTGCCCCTGTGATGAGTCACGTGGAGACAG
23: length 59 nt
CGAGGAGAGCTGTAAAATACTGCTGCCTGCTACATTGGGGCTTGGGTCTC
24: length 82 nt
GCACAGTAGCGAGTGACTCGGTTACTATGTGTTGCTGACTTTATATGTGC
25: length 6 nt
GAGTAAAGCTGAAACACCGAACCGGCAGTTAGAGCTAGTGGATTTTCTTA
26: length 87 nt
AGACATTTTCGGAACCAATGATCTTGTTGGAGTTGAGAGGCCGAACCAGG
27: length 136 nt
AACACGGTAATATCAGGTCGGCCTTAGTCACTTCGGAATTGGAGTTAGTA
28: length 68 nt
CACCGGTAGCTGAATGTAACGAATCTACGTTGACACGGTTAAAGGGATAT
29: length 84 nt
AGATTGTCATGGACTGACGGAGAACACCGTGCTTAAAGTGGAATAATGTG
30: length 51 nt
CGGCAAGGAGGAGGCTACTGACCAATCAGACAAACGTTCATGGCTAGAGA
31: length 51 nt
AGCAGCCGCTCAACTGTGACGTCGGCTGAGTCTACCAGCCCTCAGACTAC
32: length 73 nt
TCCGATCTTATAGTAGTGATCCTGATGGAACAATAACTGACCCAGTAAAG
33: length 58 nt
ACGTTTTATTGTACCTAGGTGTGGCACCATGAAAGGCAGGAGGCCCACCG
34: length 28 nt
AATCCGCAATCGTGCTACGCATTTTCCAGACAATATTTCGAGGTTACCGC
35: length 13 nt
AACACGAGGCCCTCTCGACGCTGCTATTTAACAGCGCGTGCCTATTGAAA
36: length 2 nt
ATACCGAAACTTACTAGTTGTGACAGCGGATTCCAAACTGCGGGTGAATA
37: length 71 nt
GTGAGGAGTACTCGGCACCGGGAACGTAGTAATGTGCTCTGATCCACTCT
38: length 46 nt
CGAACGTAAATAAGTCTGCTGCTGCCTCAGCAGAGGGATGGGAGGGGTCT
39: length 48 nt
CGCAAGAGTGTCGAGCTGGCAGACGCGTATTGCCTTTATATCCGTGAAAC
40: length 63 nt
TGACAAACATAAGTTTGTCGCCCGTTTCCACATAGTAGAATTCATTAGAA
41: length 11 nt
AGAAATAGTACGTAGTGTTGGCTGTGAATATTCGGAGATAAGGACTCATT
42: length 68 nt
GGTTCACAAATTGTCATAGATCATCGTGGTACGACGGGCGATCTTATTTC
43: length 7 nt
CGAGCGTGATGACGTAATGGAGTGAATAAGGAGTGCGAAATGGTGCGTTT
44: length 84 nt
TGGGACGATCATTCCCCCTGACCTTACGTACAATGCAATTGCGAGTGGCT
45: length 23 nt
TTGTACTTACGGCTTAAATTGCACAGAAGGGGTTCTCCGGAAGATAACTA
46: length 58 nt
GGCCCTTCGGTGCAGTCGGCCTAGGTATCAAGCTCGCCTGCCCGTGTTAT
47: length 50 nt
TATCGTGGTTATGTGCGGACCGCCTGATATTACAGCTGATGTGAGGGCAA
48: length 63 nt
TCTGCTAGGGATCCACTCCTGGGTCATTATTAAGCACTTACGGCCTGAGA
49: length 48 nt
GGAGAATTCGCGGCATAAGATCTCCATACGGGTAGTATGTGCCACTATTA
50: length 64 nt
AATGCGTACGGCCCAAAAACCTTCAGGATTACCATGAACCCGTAACTCAA
51: length 41 nt
AGTCATATAATGATTGAGTTCCCTCAACCCTCCGGACGTAAACACTCCGG
52: length 44 nt
TCCAGATGGCCTGGCAATCAGGTGTTGACGGGAACCGCACCCCTGCTGCT
53: length 62 nt
AGGTTGTCTGAATCACTCGGGAGACTTCCCCGCACGATATCGGCACCTGA
54: length 73 nt
CCGTACGGAACATCCTGCAGCTTCGCCCGGTCGAGAAGAAGGTAGAGATA
55: length 39 nt
TGCGCGGCGGACTCTGTCATTTATGACCTGCTTGAACGATGAGTGGGCCA
56: length 88 nt
CGATCCATAGGGCACGCTAGTTTGCATTTTAAGGAGTGCCAATTACGAGG
57: length 117 nt
TATTGCGTCCGTTGCCTGCTCCATCCGTTGCACTATGCCATAAGGCAACA
58: length 67 nt
TTAACGCACGATCAACGGTTCATAATCTGCGGTGATCCCAGAGCTCTACA
59: length 84 nt
CTGACCCTCGATGTGTTTAATAGTTGTATTTCGGCCCCAGTAGGCTCGAC
60: length 71 nt
CTGTCAGATGCGCATGTTCGTGGTCCATTGTCAGCCAAATTAAAGGTTGC
61: length 102 nt
TACATGCTCATATGAACTGACATTGAGTGGAAGGTAGCGACCGCGAGGTT
62: length 66 nt
ACATGATCTTTGTCGAATACTCAAAATGCACAGTGCCTAACTATTTGGGT
63: length 22 nt
GTCACGACACGCATCTACACTAAACAGCACCACATGTTAGAGATGAGATC
64: length 46 nt
CGTGGTCGTGAAGACCCACTATATTAGCAAGAGTCGCGATAGATGGGTGT
65: length 78 nt
CTAGGTAATTTTGAGAGACACCCCGACGCACGGATGTTCCGTACAAAGAT
66: length 32 nt
CAGTCTGAATGTATCAACTAAGAGAATCGTACGAAAATAGACCTAATCGA
67: length 42 nt
GCTCCGCACACTAGATAGCTGATGCCCACATACTCATTCATTATTCGATG
68: length 49 nt
GTCTGAACCAGACACGCACTACGGCTAGCACCTATCCTTATCCTAAAACG
69: length 41 nt
GAGTTACGACCGGACGGGTTGTTAGGTCACCCCGGAATCAGACGATTTAC
70: length 29 nt
GTAGGCCAACGCGAACTACTCCTCCCGCCCACAGTCTAGACTAAGCGTTG
71: length 94 nt
GGTTTAGACTGTCTGCAACTATGTTACTGGATATACCTAATGACAGGGGT
72: length 48 nt
TAAGAATGTTATTGGGCGGTAGCTTACTCTATTGGAATTGAGAAAATGTA
73: length 63 nt
CTAGTATGTTTTATTGGATGCTTGAGGTAGGAATAATCACATCGGGCCGA
74: length 70 nt
ACAAACCTCCTCCCCGGCGACGGGCAACGCCCGCATTGCAACATGCAAGA
75: length 58 nt
GGCTCCATTTCGCTACCCTCTCCCCTAAATCAGGTCCGCGGAGTTGTCGT
76: length 98 nt
ACATACCTATGCCTCAGATCGGTTCGGCTCTTGAGGCGACACCCGTGATA
77: length 34 nt
TTAGAAAGGAACTCATTTTAAGCAAGTTATGCCAGGTTAGCCCGCCCTGG
78: length 48 nt
AAGGCTCGCCGTGACCAGTAGTGCGCAACCTTTCAAGCCGTGTAATATTT
79: length 51 nt
CCTTAAGGGTGGTTTGCACCGTAAAGACCCTCTCTTCCACTTTCCCCGAA
80: length 30 nt
GCCTCGACGTGCCGGACGATGGGCGCTGATCGAAAGTCCATTGTCCCCCT
81: length 14 nt
AGGGCAGTGTACACACACGGCAAGGGTTGCGATGGGTAGAGGCTAAGACG
82: length 39 nt
AATGGGACTTAGTAAACAGGCTGGCCGCCTAGACTGTGTGTGAATGCCCA
83: length 29 nt
AGCTTGGACTTAGAGAAAATCAGTCTACAAGACATAGGCTTTAAAGAGGA
84: length 15 nt
ACGGAAAGTAAGTCTCTCGTCCCTGGTTAAAGGGTCAGTCCCTCCCACCT
85: length 20 nt
CCCAGAGCAAACTGGACCTCCCACCGGCAGATCATCGTTTATTATTATAC
86: length 64 nt
CGTGGCCAGTACATGCTGATGATCCCTATTACAGACCTCGCGTGTTGAGA
87: length 52 nt
GTTAGGCTGTACTGAATACCATGAGCATGTGGGTAGGTTGTACTGGAACC
88: length 60 nt
CGCCGGGCTCCCCGAAGTATAGAGCCAGCGGTTAAATAACTTATCTGATC
89: length 32 nt
CTTGTACGTCGAGTATGAGAGGGGTTACACTTTCTGCTTGCTAAGTTCAG
90: length 14 nt
AAACAGATTAATAACGTAATACCCCCGCTCTACGCCCCTGCATTACGGTT
91: length 21 nt
GTAGTTCGTAGCAGGGGATTGTCATAAATCAGCGCCTGATGGGAAGCGTT
92: length 47 nt
TGCCCAGATGTAACACATCCCGGCCCATTCTGGTAACTCCGTTTCTGGGT
93: length 47 nt
AATTAAGGGGTTGTCAAACCGGCCACTTTTGAACAATGGGCTTCGCCGCC
94: length 42 nt
TTCAACCTTTACGATGCAAACATAGACAAACTTGGACGTATTCGGACCCT
95: length 35 nt
GGCGCTTAGTGGAATGCCCCCCCGTATGCAGCATCGAGTATTCCGTAGCG
96: length 54 nt
TTGCTGATCTTTCGAATTTGGATCTGGGATTTTGCCAGATAGCGCCCGTA
97: length 34 nt
TACGTAGATATTTTCAGGAGGCACTGAACAGCCCAGAGCCTCCATCGGGC
98: length 15 nt
ATTGCGCAGGTATGGCTCAGAGACACAGTATACTATTACCTTCCCGCAAC
99: length 42 nt
CGATCTGGCATCGGCATCCGAGTAGCCCCTACGTATGGCTTTGCCCAACA
import logging
LOG = logging.getLogger(__name__)
def read_in_fasta(file_path):
def read_in_fasta(file_path: str) -> dict[str,str]:
"""
This function reads in FASTA files.
......@@ -10,26 +10,25 @@ def read_in_fasta(file_path):
Returns:
Dict: It returns a dictionary with sequences.
"""
LOG.info("Reading in FASTA files from destination.")
sequences = {}
sequences: dict[str,str] = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
def_line = line.strip()
def_line = def_line.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
if def_line not in sequences:
sequences[def_line] = ''
sequences[def_line] += line.strip()
f.close()
return sequences
def read_sequence(seq, read_length):
def read_sequence(seq:str, read_length:int) -> str:
"""
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
smaller than the requested length or cuts the sequence if its longer.
Args:
seq (str): the sequence to read
......@@ -40,8 +39,8 @@ def read_sequence(seq, read_length):
"""
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
bases: list[str] = ["A", "T", "C", "G"]
sequenced: str = ''
if read_length > len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
......@@ -53,7 +52,7 @@ def read_sequence(seq, read_length):
return sequenced
def simulate_sequencing(sequences, read_length):
def simulate_sequencing(sequences: dict[str,str], read_length: int) -> dict[str,str]:
"""
Simulates sequencing.
......@@ -65,38 +64,37 @@ def simulate_sequencing(sequences, read_length):
dict: of n sequences as values
"""
LOG.info("Sequencing in progress....")
results = {}
results: dict[str,str] = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.")
return results
def generate_sequences(n, mean, sd):
def generate_sequences(n: int, mean: int, sd: int) -> dict[str,str]:
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standart deviation of length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
dict: of n sequences
"""
from random import choice, gauss
LOG.info("Generating random sequences.")
dict = {}
sequences: dict[str,str] = {}
for i in range(n):
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
key = str(i) + ': length ' + str(len(seq)) + ' nt'
dict[key] = seq
return dict
def write_fasta(sequences, file_path):
seq: str = ""
bases: list[str] = ["A", "T", "C", "G"]
for nt in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(bases)
key: str = str(i) + ': length ' + str(len(seq)) + ' nt'
sequences[key] = seq
return sequences
def write_fasta(sequences: dict[str,str], file_path: str):
"""
Takes a dictionary and writes it to a fasta file.
Must specify the filename when calling the function.
......@@ -116,17 +114,17 @@ def write_fasta(sequences, file_path):
class ReadSequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
self.sequences: dict[str,str] = {}
self.reads: dict[str,str] = {}
def add_random_sequences(self, n, mean, sd):
self.sequences = generate_sequences(n, mean, sd)
def add_random_sequences(self, n: int, mean: int, sd: int):
self.sequences: dict[str,str] = generate_sequences(n, mean, sd)
def read_fasta(self, input_file):
self.sequences = read_in_fasta(input_file)
self.sequences: dict[str,str] = read_in_fasta(input_file)
def run_sequencing(self, read_length):
self.reads = simulate_sequencing(self.sequences, read_length)
def run_sequencing(self, read_length: int):
self.reads: dict[str,str] = simulate_sequencing(self.sequences, read_length)
def write_fasta(self, output_file_path):
def write_fasta(self, output_file_path: str):
write_fasta(self.reads, output_file_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment