Skip to content
Snippets Groups Projects
Commit af60cd96 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

chore: added type hints in modules

parent c60004ba
No related branches found
No related tags found
1 merge request!22chore: added type hints in modules
1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp 0: length 103 nt
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaa ACGCCCTATGTCAGAGTGGTGTTAGTGCACAATTCATTCCGATATGCGAA
2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp 1: length 35 nt
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtat ATAACGAGCGCGAGCCCTTAACGGGCAGGACTGCCGACTCACACCTTAAT
3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp 2: length 61 nt
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgc GGTCTCATAAGCACTTCGGGTACATCCTCCTAAATGGGCCGCGACTTAGG
4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp 3: length 54 nt
acgtctggagcgtgggttgacccctgtacatggttctttccggatcctta TAGGTCTCGGATACGAGATATAACCGTAGCATGGGAGTGACACCCATGCC
5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp 4: length 64 nt
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaa TCTGCGATACCCAATTTAGGGCATGGCATAAGGCCCGGCGGGTATTTCCG
6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp 5: length 85 nt
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcc TTGTTAAGCGTGTTGGTAGCGCCCCCAAATTGTCCCAACTGGGCCACACC
7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp 6: length 53 nt
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcaga ATAGTACAAGTTAGACCTCATGCATTGAGCTGCCCGGTTCGGACCTTAAT
8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp 7: length 27 nt
attggcccggtccaggacagagccttatattgctactggtatgagaaccg TAAAATATTAAGGATAGCCGGAACGCGCTGGCGACTGACGACACTACACG
9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp 8: length 25 nt
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcac TAGATTCCAGGGAATGACGTCACACCGTTCCGTATATCAGCGGCGCGGAA
10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp 9: length 52 nt
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgat GGTGCACCTATTAACAGAGTGACTTTGGGGGAATCACTTATTCGCATATA
11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp 10: length 41 nt
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtccc AACGACAAGCTGAACGAGCTCTGGCGGCTACGCAGTGTTCGCGCAAGCGA
12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp 11: length 106 nt
gaattcctggggatttactcacccccgaggcggacaagatttccagctgg GTAATAAACTAAACATCAGCTGAGGCGTCCGACCACTGCATCTTTTGTTG
13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp 12: length 5 nt
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacg GTAATATGGTATAGCTCGACCCTCGTACAACGGGGGTCAGACAGCGTTGC
14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp 13: length 49 nt
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgtt TTACTTTGCACGCATGCATGGAGGCACCATGAAAGACGGGAAGCGGCTAA
15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp 14: length 22 nt
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaact GCAAGGGGTTTGAGGGAGCAACGTAAGATCAGAGATAACAGTCTCAAGAG
16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp 15: length 109 nt
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcac AATTGAAGTCAGCAAAGAATTGGGATTGCTGGAAGCAACACGGACACGTT
17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp 16: length 43 nt
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacc GAACCATTCTAAACAGCACCACCCGCATCCCGCTTTGCGCAGCAGCCACT
18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp 17: length 85 nt
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttcc ATGTCTCACAGGGGAGGTCTCCAGGCTTCTACCAAGTTCCCAATTCCATG
19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp 18: length 16 nt
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataat ATCTCATCAATTATGTTGTACACCTGCTCGTTCTAACTTGCAGACTGCAT
20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp 19: length 52 nt
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtac GAGTATCGCAGTATACTGTTGGCGGACATTCCAGAATATTGGTATTCAGC
21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp 20: length 2 nt
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaa ATATAGCTACCCCAGATTGTCTTAACGAGTCCCGCACCCCCCACCAAGAT
22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp 21: length 13 nt
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtc GTATTTACCTGCCCACTAGTACAAGTCCCCGAACCTTAGCAATCCTAAGT
23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp 22: length 64 nt
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgt TGGCCGTTTCAGAAGGATTCCTGCCCCTGTGATGAGTCACGTGGAGACAG
24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp 23: length 59 nt
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaag CGAGGAGAGCTGTAAAATACTGCTGCCTGCTACATTGGGGCTTGGGTCTC
25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp 24: length 82 nt
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgac GCACAGTAGCGAGTGACTCGGTTACTATGTGTTGCTGACTTTATATGTGC
26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp 25: length 6 nt
gggtgcgttatggggactaaagactgttactaccggtactccgccttata GAGTAAAGCTGAAACACCGAACCGGCAGTTAGAGCTAGTGGATTTTCTTA
27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp 26: length 87 nt
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgct AGACATTTTCGGAACCAATGATCTTGTTGGAGTTGAGAGGCCGAACCAGG
28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp 27: length 136 nt
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaa AACACGGTAATATCAGGTCGGCCTTAGTCACTTCGGAATTGGAGTTAGTA
29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp 28: length 68 nt
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgc CACCGGTAGCTGAATGTAACGAATCTACGTTGACACGGTTAAAGGGATAT
30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp 29: length 84 nt
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccc AGATTGTCATGGACTGACGGAGAACACCGTGCTTAAAGTGGAATAATGTG
31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp 30: length 51 nt
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaa CGGCAAGGAGGAGGCTACTGACCAATCAGACAAACGTTCATGGCTAGAGA
32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp 31: length 51 nt
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatca AGCAGCCGCTCAACTGTGACGTCGGCTGAGTCTACCAGCCCTCAGACTAC
33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp 32: length 73 nt
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacct TCCGATCTTATAGTAGTGATCCTGATGGAACAATAACTGACCCAGTAAAG
34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp 33: length 58 nt
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcagg ACGTTTTATTGTACCTAGGTGTGGCACCATGAAAGGCAGGAGGCCCACCG
35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp 34: length 28 nt
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggtt AATCCGCAATCGTGCTACGCATTTTCCAGACAATATTTCGAGGTTACCGC
36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp 35: length 13 nt
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgaga AACACGAGGCCCTCTCGACGCTGCTATTTAACAGCGCGTGCCTATTGAAA
37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp 36: length 2 nt
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagact ATACCGAAACTTACTAGTTGTGACAGCGGATTCCAAACTGCGGGTGAATA
38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp 37: length 71 nt
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcacta GTGAGGAGTACTCGGCACCGGGAACGTAGTAATGTGCTCTGATCCACTCT
39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp 38: length 46 nt
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatca CGAACGTAAATAAGTCTGCTGCTGCCTCAGCAGAGGGATGGGAGGGGTCT
40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp 39: length 48 nt
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaac CGCAAGAGTGTCGAGCTGGCAGACGCGTATTGCCTTTATATCCGTGAAAC
41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp 40: length 63 nt
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgc TGACAAACATAAGTTTGTCGCCCGTTTCCACATAGTAGAATTCATTAGAA
42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp 41: length 11 nt
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcata AGAAATAGTACGTAGTGTTGGCTGTGAATATTCGGAGATAAGGACTCATT
43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp 42: length 68 nt
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtatt GGTTCACAAATTGTCATAGATCATCGTGGTACGACGGGCGATCTTATTTC
44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp 43: length 7 nt
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtatt CGAGCGTGATGACGTAATGGAGTGAATAAGGAGTGCGAAATGGTGCGTTT
45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp 44: length 84 nt
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctat TGGGACGATCATTCCCCCTGACCTTACGTACAATGCAATTGCGAGTGGCT
46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp 45: length 23 nt
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaa TTGTACTTACGGCTTAAATTGCACAGAAGGGGTTCTCCGGAAGATAACTA
47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp 46: length 58 nt
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatg GGCCCTTCGGTGCAGTCGGCCTAGGTATCAAGCTCGCCTGCCCGTGTTAT
48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp 47: length 50 nt
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccg TATCGTGGTTATGTGCGGACCGCCTGATATTACAGCTGATGTGAGGGCAA
49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp 48: length 63 nt
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttat TCTGCTAGGGATCCACTCCTGGGTCATTATTAAGCACTTACGGCCTGAGA
50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp 49: length 48 nt
taactgtcggtcactgctcatcccgactagttcggctcactagacttact GGAGAATTCGCGGCATAAGATCTCCATACGGGTAGTATGTGCCACTATTA
50: length 64 nt
AATGCGTACGGCCCAAAAACCTTCAGGATTACCATGAACCCGTAACTCAA
51: length 41 nt
AGTCATATAATGATTGAGTTCCCTCAACCCTCCGGACGTAAACACTCCGG
52: length 44 nt
TCCAGATGGCCTGGCAATCAGGTGTTGACGGGAACCGCACCCCTGCTGCT
53: length 62 nt
AGGTTGTCTGAATCACTCGGGAGACTTCCCCGCACGATATCGGCACCTGA
54: length 73 nt
CCGTACGGAACATCCTGCAGCTTCGCCCGGTCGAGAAGAAGGTAGAGATA
55: length 39 nt
TGCGCGGCGGACTCTGTCATTTATGACCTGCTTGAACGATGAGTGGGCCA
56: length 88 nt
CGATCCATAGGGCACGCTAGTTTGCATTTTAAGGAGTGCCAATTACGAGG
57: length 117 nt
TATTGCGTCCGTTGCCTGCTCCATCCGTTGCACTATGCCATAAGGCAACA
58: length 67 nt
TTAACGCACGATCAACGGTTCATAATCTGCGGTGATCCCAGAGCTCTACA
59: length 84 nt
CTGACCCTCGATGTGTTTAATAGTTGTATTTCGGCCCCAGTAGGCTCGAC
60: length 71 nt
CTGTCAGATGCGCATGTTCGTGGTCCATTGTCAGCCAAATTAAAGGTTGC
61: length 102 nt
TACATGCTCATATGAACTGACATTGAGTGGAAGGTAGCGACCGCGAGGTT
62: length 66 nt
ACATGATCTTTGTCGAATACTCAAAATGCACAGTGCCTAACTATTTGGGT
63: length 22 nt
GTCACGACACGCATCTACACTAAACAGCACCACATGTTAGAGATGAGATC
64: length 46 nt
CGTGGTCGTGAAGACCCACTATATTAGCAAGAGTCGCGATAGATGGGTGT
65: length 78 nt
CTAGGTAATTTTGAGAGACACCCCGACGCACGGATGTTCCGTACAAAGAT
66: length 32 nt
CAGTCTGAATGTATCAACTAAGAGAATCGTACGAAAATAGACCTAATCGA
67: length 42 nt
GCTCCGCACACTAGATAGCTGATGCCCACATACTCATTCATTATTCGATG
68: length 49 nt
GTCTGAACCAGACACGCACTACGGCTAGCACCTATCCTTATCCTAAAACG
69: length 41 nt
GAGTTACGACCGGACGGGTTGTTAGGTCACCCCGGAATCAGACGATTTAC
70: length 29 nt
GTAGGCCAACGCGAACTACTCCTCCCGCCCACAGTCTAGACTAAGCGTTG
71: length 94 nt
GGTTTAGACTGTCTGCAACTATGTTACTGGATATACCTAATGACAGGGGT
72: length 48 nt
TAAGAATGTTATTGGGCGGTAGCTTACTCTATTGGAATTGAGAAAATGTA
73: length 63 nt
CTAGTATGTTTTATTGGATGCTTGAGGTAGGAATAATCACATCGGGCCGA
74: length 70 nt
ACAAACCTCCTCCCCGGCGACGGGCAACGCCCGCATTGCAACATGCAAGA
75: length 58 nt
GGCTCCATTTCGCTACCCTCTCCCCTAAATCAGGTCCGCGGAGTTGTCGT
76: length 98 nt
ACATACCTATGCCTCAGATCGGTTCGGCTCTTGAGGCGACACCCGTGATA
77: length 34 nt
TTAGAAAGGAACTCATTTTAAGCAAGTTATGCCAGGTTAGCCCGCCCTGG
78: length 48 nt
AAGGCTCGCCGTGACCAGTAGTGCGCAACCTTTCAAGCCGTGTAATATTT
79: length 51 nt
CCTTAAGGGTGGTTTGCACCGTAAAGACCCTCTCTTCCACTTTCCCCGAA
80: length 30 nt
GCCTCGACGTGCCGGACGATGGGCGCTGATCGAAAGTCCATTGTCCCCCT
81: length 14 nt
AGGGCAGTGTACACACACGGCAAGGGTTGCGATGGGTAGAGGCTAAGACG
82: length 39 nt
AATGGGACTTAGTAAACAGGCTGGCCGCCTAGACTGTGTGTGAATGCCCA
83: length 29 nt
AGCTTGGACTTAGAGAAAATCAGTCTACAAGACATAGGCTTTAAAGAGGA
84: length 15 nt
ACGGAAAGTAAGTCTCTCGTCCCTGGTTAAAGGGTCAGTCCCTCCCACCT
85: length 20 nt
CCCAGAGCAAACTGGACCTCCCACCGGCAGATCATCGTTTATTATTATAC
86: length 64 nt
CGTGGCCAGTACATGCTGATGATCCCTATTACAGACCTCGCGTGTTGAGA
87: length 52 nt
GTTAGGCTGTACTGAATACCATGAGCATGTGGGTAGGTTGTACTGGAACC
88: length 60 nt
CGCCGGGCTCCCCGAAGTATAGAGCCAGCGGTTAAATAACTTATCTGATC
89: length 32 nt
CTTGTACGTCGAGTATGAGAGGGGTTACACTTTCTGCTTGCTAAGTTCAG
90: length 14 nt
AAACAGATTAATAACGTAATACCCCCGCTCTACGCCCCTGCATTACGGTT
91: length 21 nt
GTAGTTCGTAGCAGGGGATTGTCATAAATCAGCGCCTGATGGGAAGCGTT
92: length 47 nt
TGCCCAGATGTAACACATCCCGGCCCATTCTGGTAACTCCGTTTCTGGGT
93: length 47 nt
AATTAAGGGGTTGTCAAACCGGCCACTTTTGAACAATGGGCTTCGCCGCC
94: length 42 nt
TTCAACCTTTACGATGCAAACATAGACAAACTTGGACGTATTCGGACCCT
95: length 35 nt
GGCGCTTAGTGGAATGCCCCCCCGTATGCAGCATCGAGTATTCCGTAGCG
96: length 54 nt
TTGCTGATCTTTCGAATTTGGATCTGGGATTTTGCCAGATAGCGCCCGTA
97: length 34 nt
TACGTAGATATTTTCAGGAGGCACTGAACAGCCCAGAGCCTCCATCGGGC
98: length 15 nt
ATTGCGCAGGTATGGCTCAGAGACACAGTATACTATTACCTTCCCGCAAC
99: length 42 nt
CGATCTGGCATCGGCATCCGAGTAGCCCCTACGTATGGCTTTGCCCAACA
import logging import logging
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
def read_in_fasta(file_path): def read_in_fasta(file_path: str) -> dict[str,str]:
""" """
This function reads in FASTA files. This function reads in FASTA files.
...@@ -10,26 +10,25 @@ def read_in_fasta(file_path): ...@@ -10,26 +10,25 @@ def read_in_fasta(file_path):
Returns: Returns:
Dict: It returns a dictionary with sequences. Dict: It returns a dictionary with sequences.
""" """
LOG.info("Reading in FASTA files from destination.") LOG.info("Reading in FASTA files from destination.")
sequences = {} sequences: dict[str,str] = {}
f = open(file_path) f = open(file_path)
for line in f: for line in f:
if line[0] == '>': if line[0] == '>':
defline = line.strip() def_line = line.strip()
defline = defline.replace('>', '') def_line = def_line.replace('>', '')
else: else:
if defline not in sequences: if def_line not in sequences:
sequences[defline] = '' sequences[def_line] = ''
sequences[defline] += line.strip() sequences[def_line] += line.strip()
f.close() f.close()
return sequences return sequences
def read_sequence(seq, read_length): def read_sequence(seq:str, read_length:int) -> str:
""" """
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer. smaller than the requested length or cuts the sequence if its longer.
Args: Args:
seq (str): the sequence to read seq (str): the sequence to read
...@@ -40,8 +39,8 @@ def read_sequence(seq, read_length): ...@@ -40,8 +39,8 @@ def read_sequence(seq, read_length):
""" """
from random import choice from random import choice
bases = ["A", "T", "C", "G"] bases: list[str] = ["A", "T", "C", "G"]
sequenced = '' sequenced: str = ''
if read_length > len(seq): if read_length > len(seq):
for nt in range(len(seq)): for nt in range(len(seq)):
sequenced += seq[nt] sequenced += seq[nt]
...@@ -53,7 +52,7 @@ def read_sequence(seq, read_length): ...@@ -53,7 +52,7 @@ def read_sequence(seq, read_length):
return sequenced return sequenced
def simulate_sequencing(sequences, read_length): def simulate_sequencing(sequences: dict[str,str], read_length: int) -> dict[str,str]:
""" """
Simulates sequencing. Simulates sequencing.
...@@ -65,38 +64,37 @@ def simulate_sequencing(sequences, read_length): ...@@ -65,38 +64,37 @@ def simulate_sequencing(sequences, read_length):
dict: of n sequences as values dict: of n sequences as values
""" """
LOG.info("Sequencing in progress....") LOG.info("Sequencing in progress....")
results = {} results: dict[str,str] = {}
for index, key in enumerate(sequences): for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length) results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.") LOG.info("Sequencing was successfully executed.")
return results return results
def generate_sequences(n: int, mean: int, sd: int) -> dict[str,str]:
def generate_sequences(n, mean, sd):
""" """
Generates random sequences. Generates random sequences.
Args: Args:
n (int): Amount of sequences to generate. n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution). mean (int): mean length of sequence (gaussian distribution).
sd (float): standart deviation of length of sequence (gaussian distribution). sd (float): standard deviation of length of sequence (gaussian distribution).
Returns: Returns:
dict: of n sequences dict: of n sequences
""" """
from random import choice, gauss from random import choice, gauss
LOG.info("Generating random sequences.") LOG.info("Generating random sequences.")
dict = {} sequences: dict[str,str] = {}
for i in range(n): for i in range(n):
seq = "" seq: str = ""
nt = ["A", "T", "C", "G"] bases: list[str] = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))): for nt in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt) seq = seq + choice(bases)
key = str(i) + ': length ' + str(len(seq)) + ' nt' key: str = str(i) + ': length ' + str(len(seq)) + ' nt'
dict[key] = seq sequences[key] = seq
return dict return sequences
def write_fasta(sequences, file_path): def write_fasta(sequences: dict[str,str], file_path: str):
""" """
Takes a dictionary and writes it to a fasta file. Takes a dictionary and writes it to a fasta file.
Must specify the filename when calling the function. Must specify the filename when calling the function.
...@@ -116,17 +114,17 @@ def write_fasta(sequences, file_path): ...@@ -116,17 +114,17 @@ def write_fasta(sequences, file_path):
class ReadSequencer: class ReadSequencer:
def __init__(self): def __init__(self):
self.sequences = {} self.sequences: dict[str,str] = {}
self.reads = {} self.reads: dict[str,str] = {}
def add_random_sequences(self, n, mean, sd): def add_random_sequences(self, n: int, mean: int, sd: int):
self.sequences = generate_sequences(n, mean, sd) self.sequences: dict[str,str] = generate_sequences(n, mean, sd)
def read_fasta(self, input_file): def read_fasta(self, input_file):
self.sequences = read_in_fasta(input_file) self.sequences: dict[str,str] = read_in_fasta(input_file)
def run_sequencing(self, read_length): def run_sequencing(self, read_length: int):
self.reads = simulate_sequencing(self.sequences, read_length) self.reads: dict[str,str] = simulate_sequencing(self.sequences, read_length)
def write_fasta(self, output_file_path): def write_fasta(self, output_file_path: str):
write_fasta(self.reads, output_file_path) write_fasta(self.reads, output_file_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment