From af60cd960dc939b709a6ef337ab243e4eef5a5e0 Mon Sep 17 00:00:00 2001
From: Christoph Harmel <christoph.harmel@unibas.ch>
Date: Tue, 15 Nov 2022 14:25:57 +0100
Subject: [PATCH] chore: added type hints in modules

---
 .../fasta_testfile/result.fasta               | 300 ++++++++++++------
 read_sequencer_package/modules.py             |  68 ++--
 2 files changed, 233 insertions(+), 135 deletions(-)

diff --git a/read_sequencer_package/fasta_testfile/result.fasta b/read_sequencer_package/fasta_testfile/result.fasta
index 1aedc2c..32258d6 100644
--- a/read_sequencer_package/fasta_testfile/result.fasta
+++ b/read_sequencer_package/fasta_testfile/result.fasta
@@ -1,100 +1,200 @@
-1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp
-tgagcactcggtgccaagggcggggatacacagatggttggctgatacaa
-2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp
-ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtat
-3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp
-acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgc
-4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp
-acgtctggagcgtgggttgacccctgtacatggttctttccggatcctta
-5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp
-agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaa
-6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp
-tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcc
-7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp
-actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcaga
-8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp
-attggcccggtccaggacagagccttatattgctactggtatgagaaccg
-9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp
-aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcac
-10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp
-atcctagcgccaaagatttactgttatggggtcgacgaacactagccgat
-11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp
-cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtccc
-12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp
-gaattcctggggatttactcacccccgaggcggacaagatttccagctgg
-13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp
-aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacg
-14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp
-atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgtt
-15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp
-accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaact
-16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp
-ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcac
-17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp
-attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacc
-18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp
-accgattacaggcagtcggccttgtccgctcgtatatccagggatgttcc
-19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
-ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataat
-20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp
-atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtac
-21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp
-cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaa
-22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp
-caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtc
-23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp
-atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgt
-24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp
-cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaag
-25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp
-ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgac
-26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp
-gggtgcgttatggggactaaagactgttactaccggtactccgccttata
-27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp
-gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgct
-28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp
-agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaa
-29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp
-tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgc
-30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp
-caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccc
-31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp
-tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaa
-32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
-gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatca
-33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp
-gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacct
-34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp
-gcctaggggtcttgaccacagggagtacgagcattgatcattggagcagg
-35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp
-gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggtt
-36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp
-ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgaga
-37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp
-ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagact
-38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp
-gtggcctaccataaatcaatttgggttaacgctctttgatctacgcacta
-39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp
-accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatca
-40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp
-agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaac
-41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp
-ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgc
-42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp
-gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcata
-43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp
-ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtatt
-44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp
-gtagaacttgttccccatggacaatgctagttccgttaatgccaggtatt
-45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp
-aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctat
-46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp
-aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaa
-47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp
-gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatg
-48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp
-catcaagatgggttacgtaggaccgagattcagtctctgggttagagccg
-49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp
-taacctcagtctcgttcccccctcggtagttcggacccttattcgcttat
-50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp
-taactgtcggtcactgctcatcccgactagttcggctcactagacttact
+0: length 103 nt
+ACGCCCTATGTCAGAGTGGTGTTAGTGCACAATTCATTCCGATATGCGAA
+1: length 35 nt
+ATAACGAGCGCGAGCCCTTAACGGGCAGGACTGCCGACTCACACCTTAAT
+2: length 61 nt
+GGTCTCATAAGCACTTCGGGTACATCCTCCTAAATGGGCCGCGACTTAGG
+3: length 54 nt
+TAGGTCTCGGATACGAGATATAACCGTAGCATGGGAGTGACACCCATGCC
+4: length 64 nt
+TCTGCGATACCCAATTTAGGGCATGGCATAAGGCCCGGCGGGTATTTCCG
+5: length 85 nt
+TTGTTAAGCGTGTTGGTAGCGCCCCCAAATTGTCCCAACTGGGCCACACC
+6: length 53 nt
+ATAGTACAAGTTAGACCTCATGCATTGAGCTGCCCGGTTCGGACCTTAAT
+7: length 27 nt
+TAAAATATTAAGGATAGCCGGAACGCGCTGGCGACTGACGACACTACACG
+8: length 25 nt
+TAGATTCCAGGGAATGACGTCACACCGTTCCGTATATCAGCGGCGCGGAA
+9: length 52 nt
+GGTGCACCTATTAACAGAGTGACTTTGGGGGAATCACTTATTCGCATATA
+10: length 41 nt
+AACGACAAGCTGAACGAGCTCTGGCGGCTACGCAGTGTTCGCGCAAGCGA
+11: length 106 nt
+GTAATAAACTAAACATCAGCTGAGGCGTCCGACCACTGCATCTTTTGTTG
+12: length 5 nt
+GTAATATGGTATAGCTCGACCCTCGTACAACGGGGGTCAGACAGCGTTGC
+13: length 49 nt
+TTACTTTGCACGCATGCATGGAGGCACCATGAAAGACGGGAAGCGGCTAA
+14: length 22 nt
+GCAAGGGGTTTGAGGGAGCAACGTAAGATCAGAGATAACAGTCTCAAGAG
+15: length 109 nt
+AATTGAAGTCAGCAAAGAATTGGGATTGCTGGAAGCAACACGGACACGTT
+16: length 43 nt
+GAACCATTCTAAACAGCACCACCCGCATCCCGCTTTGCGCAGCAGCCACT
+17: length 85 nt
+ATGTCTCACAGGGGAGGTCTCCAGGCTTCTACCAAGTTCCCAATTCCATG
+18: length 16 nt
+ATCTCATCAATTATGTTGTACACCTGCTCGTTCTAACTTGCAGACTGCAT
+19: length 52 nt
+GAGTATCGCAGTATACTGTTGGCGGACATTCCAGAATATTGGTATTCAGC
+20: length 2 nt
+ATATAGCTACCCCAGATTGTCTTAACGAGTCCCGCACCCCCCACCAAGAT
+21: length 13 nt
+GTATTTACCTGCCCACTAGTACAAGTCCCCGAACCTTAGCAATCCTAAGT
+22: length 64 nt
+TGGCCGTTTCAGAAGGATTCCTGCCCCTGTGATGAGTCACGTGGAGACAG
+23: length 59 nt
+CGAGGAGAGCTGTAAAATACTGCTGCCTGCTACATTGGGGCTTGGGTCTC
+24: length 82 nt
+GCACAGTAGCGAGTGACTCGGTTACTATGTGTTGCTGACTTTATATGTGC
+25: length 6 nt
+GAGTAAAGCTGAAACACCGAACCGGCAGTTAGAGCTAGTGGATTTTCTTA
+26: length 87 nt
+AGACATTTTCGGAACCAATGATCTTGTTGGAGTTGAGAGGCCGAACCAGG
+27: length 136 nt
+AACACGGTAATATCAGGTCGGCCTTAGTCACTTCGGAATTGGAGTTAGTA
+28: length 68 nt
+CACCGGTAGCTGAATGTAACGAATCTACGTTGACACGGTTAAAGGGATAT
+29: length 84 nt
+AGATTGTCATGGACTGACGGAGAACACCGTGCTTAAAGTGGAATAATGTG
+30: length 51 nt
+CGGCAAGGAGGAGGCTACTGACCAATCAGACAAACGTTCATGGCTAGAGA
+31: length 51 nt
+AGCAGCCGCTCAACTGTGACGTCGGCTGAGTCTACCAGCCCTCAGACTAC
+32: length 73 nt
+TCCGATCTTATAGTAGTGATCCTGATGGAACAATAACTGACCCAGTAAAG
+33: length 58 nt
+ACGTTTTATTGTACCTAGGTGTGGCACCATGAAAGGCAGGAGGCCCACCG
+34: length 28 nt
+AATCCGCAATCGTGCTACGCATTTTCCAGACAATATTTCGAGGTTACCGC
+35: length 13 nt
+AACACGAGGCCCTCTCGACGCTGCTATTTAACAGCGCGTGCCTATTGAAA
+36: length 2 nt
+ATACCGAAACTTACTAGTTGTGACAGCGGATTCCAAACTGCGGGTGAATA
+37: length 71 nt
+GTGAGGAGTACTCGGCACCGGGAACGTAGTAATGTGCTCTGATCCACTCT
+38: length 46 nt
+CGAACGTAAATAAGTCTGCTGCTGCCTCAGCAGAGGGATGGGAGGGGTCT
+39: length 48 nt
+CGCAAGAGTGTCGAGCTGGCAGACGCGTATTGCCTTTATATCCGTGAAAC
+40: length 63 nt
+TGACAAACATAAGTTTGTCGCCCGTTTCCACATAGTAGAATTCATTAGAA
+41: length 11 nt
+AGAAATAGTACGTAGTGTTGGCTGTGAATATTCGGAGATAAGGACTCATT
+42: length 68 nt
+GGTTCACAAATTGTCATAGATCATCGTGGTACGACGGGCGATCTTATTTC
+43: length 7 nt
+CGAGCGTGATGACGTAATGGAGTGAATAAGGAGTGCGAAATGGTGCGTTT
+44: length 84 nt
+TGGGACGATCATTCCCCCTGACCTTACGTACAATGCAATTGCGAGTGGCT
+45: length 23 nt
+TTGTACTTACGGCTTAAATTGCACAGAAGGGGTTCTCCGGAAGATAACTA
+46: length 58 nt
+GGCCCTTCGGTGCAGTCGGCCTAGGTATCAAGCTCGCCTGCCCGTGTTAT
+47: length 50 nt
+TATCGTGGTTATGTGCGGACCGCCTGATATTACAGCTGATGTGAGGGCAA
+48: length 63 nt
+TCTGCTAGGGATCCACTCCTGGGTCATTATTAAGCACTTACGGCCTGAGA
+49: length 48 nt
+GGAGAATTCGCGGCATAAGATCTCCATACGGGTAGTATGTGCCACTATTA
+50: length 64 nt
+AATGCGTACGGCCCAAAAACCTTCAGGATTACCATGAACCCGTAACTCAA
+51: length 41 nt
+AGTCATATAATGATTGAGTTCCCTCAACCCTCCGGACGTAAACACTCCGG
+52: length 44 nt
+TCCAGATGGCCTGGCAATCAGGTGTTGACGGGAACCGCACCCCTGCTGCT
+53: length 62 nt
+AGGTTGTCTGAATCACTCGGGAGACTTCCCCGCACGATATCGGCACCTGA
+54: length 73 nt
+CCGTACGGAACATCCTGCAGCTTCGCCCGGTCGAGAAGAAGGTAGAGATA
+55: length 39 nt
+TGCGCGGCGGACTCTGTCATTTATGACCTGCTTGAACGATGAGTGGGCCA
+56: length 88 nt
+CGATCCATAGGGCACGCTAGTTTGCATTTTAAGGAGTGCCAATTACGAGG
+57: length 117 nt
+TATTGCGTCCGTTGCCTGCTCCATCCGTTGCACTATGCCATAAGGCAACA
+58: length 67 nt
+TTAACGCACGATCAACGGTTCATAATCTGCGGTGATCCCAGAGCTCTACA
+59: length 84 nt
+CTGACCCTCGATGTGTTTAATAGTTGTATTTCGGCCCCAGTAGGCTCGAC
+60: length 71 nt
+CTGTCAGATGCGCATGTTCGTGGTCCATTGTCAGCCAAATTAAAGGTTGC
+61: length 102 nt
+TACATGCTCATATGAACTGACATTGAGTGGAAGGTAGCGACCGCGAGGTT
+62: length 66 nt
+ACATGATCTTTGTCGAATACTCAAAATGCACAGTGCCTAACTATTTGGGT
+63: length 22 nt
+GTCACGACACGCATCTACACTAAACAGCACCACATGTTAGAGATGAGATC
+64: length 46 nt
+CGTGGTCGTGAAGACCCACTATATTAGCAAGAGTCGCGATAGATGGGTGT
+65: length 78 nt
+CTAGGTAATTTTGAGAGACACCCCGACGCACGGATGTTCCGTACAAAGAT
+66: length 32 nt
+CAGTCTGAATGTATCAACTAAGAGAATCGTACGAAAATAGACCTAATCGA
+67: length 42 nt
+GCTCCGCACACTAGATAGCTGATGCCCACATACTCATTCATTATTCGATG
+68: length 49 nt
+GTCTGAACCAGACACGCACTACGGCTAGCACCTATCCTTATCCTAAAACG
+69: length 41 nt
+GAGTTACGACCGGACGGGTTGTTAGGTCACCCCGGAATCAGACGATTTAC
+70: length 29 nt
+GTAGGCCAACGCGAACTACTCCTCCCGCCCACAGTCTAGACTAAGCGTTG
+71: length 94 nt
+GGTTTAGACTGTCTGCAACTATGTTACTGGATATACCTAATGACAGGGGT
+72: length 48 nt
+TAAGAATGTTATTGGGCGGTAGCTTACTCTATTGGAATTGAGAAAATGTA
+73: length 63 nt
+CTAGTATGTTTTATTGGATGCTTGAGGTAGGAATAATCACATCGGGCCGA
+74: length 70 nt
+ACAAACCTCCTCCCCGGCGACGGGCAACGCCCGCATTGCAACATGCAAGA
+75: length 58 nt
+GGCTCCATTTCGCTACCCTCTCCCCTAAATCAGGTCCGCGGAGTTGTCGT
+76: length 98 nt
+ACATACCTATGCCTCAGATCGGTTCGGCTCTTGAGGCGACACCCGTGATA
+77: length 34 nt
+TTAGAAAGGAACTCATTTTAAGCAAGTTATGCCAGGTTAGCCCGCCCTGG
+78: length 48 nt
+AAGGCTCGCCGTGACCAGTAGTGCGCAACCTTTCAAGCCGTGTAATATTT
+79: length 51 nt
+CCTTAAGGGTGGTTTGCACCGTAAAGACCCTCTCTTCCACTTTCCCCGAA
+80: length 30 nt
+GCCTCGACGTGCCGGACGATGGGCGCTGATCGAAAGTCCATTGTCCCCCT
+81: length 14 nt
+AGGGCAGTGTACACACACGGCAAGGGTTGCGATGGGTAGAGGCTAAGACG
+82: length 39 nt
+AATGGGACTTAGTAAACAGGCTGGCCGCCTAGACTGTGTGTGAATGCCCA
+83: length 29 nt
+AGCTTGGACTTAGAGAAAATCAGTCTACAAGACATAGGCTTTAAAGAGGA
+84: length 15 nt
+ACGGAAAGTAAGTCTCTCGTCCCTGGTTAAAGGGTCAGTCCCTCCCACCT
+85: length 20 nt
+CCCAGAGCAAACTGGACCTCCCACCGGCAGATCATCGTTTATTATTATAC
+86: length 64 nt
+CGTGGCCAGTACATGCTGATGATCCCTATTACAGACCTCGCGTGTTGAGA
+87: length 52 nt
+GTTAGGCTGTACTGAATACCATGAGCATGTGGGTAGGTTGTACTGGAACC
+88: length 60 nt
+CGCCGGGCTCCCCGAAGTATAGAGCCAGCGGTTAAATAACTTATCTGATC
+89: length 32 nt
+CTTGTACGTCGAGTATGAGAGGGGTTACACTTTCTGCTTGCTAAGTTCAG
+90: length 14 nt
+AAACAGATTAATAACGTAATACCCCCGCTCTACGCCCCTGCATTACGGTT
+91: length 21 nt
+GTAGTTCGTAGCAGGGGATTGTCATAAATCAGCGCCTGATGGGAAGCGTT
+92: length 47 nt
+TGCCCAGATGTAACACATCCCGGCCCATTCTGGTAACTCCGTTTCTGGGT
+93: length 47 nt
+AATTAAGGGGTTGTCAAACCGGCCACTTTTGAACAATGGGCTTCGCCGCC
+94: length 42 nt
+TTCAACCTTTACGATGCAAACATAGACAAACTTGGACGTATTCGGACCCT
+95: length 35 nt
+GGCGCTTAGTGGAATGCCCCCCCGTATGCAGCATCGAGTATTCCGTAGCG
+96: length 54 nt
+TTGCTGATCTTTCGAATTTGGATCTGGGATTTTGCCAGATAGCGCCCGTA
+97: length 34 nt
+TACGTAGATATTTTCAGGAGGCACTGAACAGCCCAGAGCCTCCATCGGGC
+98: length 15 nt
+ATTGCGCAGGTATGGCTCAGAGACACAGTATACTATTACCTTCCCGCAAC
+99: length 42 nt
+CGATCTGGCATCGGCATCCGAGTAGCCCCTACGTATGGCTTTGCCCAACA
diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/modules.py
index 4859423..f924589 100644
--- a/read_sequencer_package/modules.py
+++ b/read_sequencer_package/modules.py
@@ -1,7 +1,7 @@
 import logging
 LOG = logging.getLogger(__name__)
 
-def read_in_fasta(file_path):
+def read_in_fasta(file_path: str) -> dict[str,str]:
     """
     This function reads in FASTA files.
 
@@ -10,26 +10,25 @@ def read_in_fasta(file_path):
 
     Returns:
         Dict: It returns a dictionary with sequences.
-
     """
     LOG.info("Reading in FASTA files from destination.")
-    sequences = {}
+    sequences: dict[str,str] = {}
     f = open(file_path)
     for line in f:
         if line[0] == '>':
-            defline = line.strip()
-            defline = defline.replace('>', '')
+            def_line = line.strip()
+            def_line = def_line.replace('>', '')
         else:
-            if defline not in sequences:
-                sequences[defline] = ''
-                sequences[defline] += line.strip()
+            if def_line not in sequences:
+                sequences[def_line] = ''
+                sequences[def_line] += line.strip()
     f.close()
     return sequences
 
-def read_sequence(seq, read_length):
+def read_sequence(seq:str, read_length:int) -> str:
     """
     This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is 
-    smaller then the requested length or cuts the sequence if its longer.
+    smaller than the requested length or cuts the sequence if its longer.
 
     Args:
         seq (str): the sequence to read 
@@ -40,8 +39,8 @@ def read_sequence(seq, read_length):
 
     """
     from random import choice
-    bases = ["A", "T", "C", "G"]
-    sequenced = ''
+    bases: list[str] = ["A", "T", "C", "G"]
+    sequenced: str = ''
     if read_length > len(seq):
         for nt in range(len(seq)):
             sequenced += seq[nt]
@@ -53,7 +52,7 @@ def read_sequence(seq, read_length):
 
     return sequenced
 
-def simulate_sequencing(sequences, read_length):
+def simulate_sequencing(sequences: dict[str,str], read_length: int) -> dict[str,str]:
     """
     Simulates sequencing.
 
@@ -65,38 +64,37 @@ def simulate_sequencing(sequences, read_length):
         dict: of n sequences as values 
     """
     LOG.info("Sequencing in progress....")
-    results = {}
+    results: dict[str,str] = {}
     for index, key in enumerate(sequences):
         results[key] = read_sequence(sequences[key], read_length=read_length)
     LOG.info("Sequencing was successfully executed.")
     return results
 
-
-def generate_sequences(n, mean, sd):
+def generate_sequences(n: int, mean: int, sd: int) -> dict[str,str]:
     """
     Generates random sequences.
 
     Args:
         n (int): Amount of sequences to generate.
         mean (int): mean length of sequence (gaussian distribution).
-        sd (float): standart deviation of length of sequence (gaussian distribution).
+        sd (float): standard deviation of length of sequence (gaussian distribution).
 
     Returns:
         dict: of n sequences
     """
     from random import choice, gauss
     LOG.info("Generating random sequences.")
-    dict = {}
+    sequences: dict[str,str] = {}
     for i in range(n):
-        seq = ""
-        nt = ["A", "T", "C", "G"]
-        for value in range(abs(round(gauss(mean, sd)))):
-            seq = seq + choice(nt)
-        key = str(i) + ': length ' + str(len(seq)) + ' nt'
-        dict[key] = seq
-    return dict
-
-def write_fasta(sequences, file_path):
+        seq: str = ""
+        bases: list[str] = ["A", "T", "C", "G"]
+        for nt in range(abs(round(gauss(mean, sd)))):
+            seq = seq + choice(bases)
+        key: str = str(i) + ': length ' + str(len(seq)) + ' nt'
+        sequences[key] = seq
+    return sequences
+
+def write_fasta(sequences: dict[str,str], file_path: str):
     """
     Takes a dictionary and writes it to a fasta file.
     Must specify the filename when calling the function.
@@ -116,17 +114,17 @@ def write_fasta(sequences, file_path):
 
 class ReadSequencer:
     def __init__(self):
-        self.sequences = {}
-        self.reads = {}
+        self.sequences: dict[str,str] = {}
+        self.reads: dict[str,str] = {}
 
-    def add_random_sequences(self, n, mean, sd):
-        self.sequences = generate_sequences(n, mean, sd)
+    def add_random_sequences(self, n: int, mean: int, sd: int):
+        self.sequences: dict[str,str] = generate_sequences(n, mean, sd)
 
     def read_fasta(self, input_file):
-        self.sequences = read_in_fasta(input_file)
+        self.sequences: dict[str,str] = read_in_fasta(input_file)
 
-    def run_sequencing(self, read_length):
-        self.reads = simulate_sequencing(self.sequences, read_length)
+    def run_sequencing(self, read_length: int):
+        self.reads: dict[str,str] = simulate_sequencing(self.sequences, read_length)
 
-    def write_fasta(self, output_file_path):
+    def write_fasta(self, output_file_path: str):
         write_fasta(self.reads, output_file_path)
-- 
GitLab