diff --git a/read_sequencer_package/fasta_testfile/result.fasta b/read_sequencer_package/fasta_testfile/result.fasta index 1aedc2c89f071f5db1dc1a74dde1cd6211a30a93..32258d65e587dd9e7501d585c137d799103c60f8 100644 --- a/read_sequencer_package/fasta_testfile/result.fasta +++ b/read_sequencer_package/fasta_testfile/result.fasta @@ -1,100 +1,200 @@ -1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp -tgagcactcggtgccaagggcggggatacacagatggttggctgatacaa -2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp -ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtat -3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp -acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgc -4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp -acgtctggagcgtgggttgacccctgtacatggttctttccggatcctta -5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp -agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaa -6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp -tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcc -7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp -actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcaga -8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp -attggcccggtccaggacagagccttatattgctactggtatgagaaccg -9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp -aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcac -10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp -atcctagcgccaaagatttactgttatggggtcgacgaacactagccgat -11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp -cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtccc -12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp -gaattcctggggatttactcacccccgaggcggacaagatttccagctgg -13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp -aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacg -14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp -atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgtt -15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp -accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaact -16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp -ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcac -17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp -attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacc -18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp -accgattacaggcagtcggccttgtccgctcgtatatccagggatgttcc -19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp -ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataat -20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp -atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtac -21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp -cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaa -22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp -caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtc -23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp -atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgt -24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp -cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaag -25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp -ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgac -26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp -gggtgcgttatggggactaaagactgttactaccggtactccgccttata -27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp -gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgct -28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp -agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaa -29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp -tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgc -30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp -caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccc -31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp -tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaa -32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp -gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatca -33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp -gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacct -34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp -gcctaggggtcttgaccacagggagtacgagcattgatcattggagcagg -35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp -gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggtt -36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp -ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgaga -37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp -ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagact -38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp -gtggcctaccataaatcaatttgggttaacgctctttgatctacgcacta -39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp -accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatca -40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp -agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaac -41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp -ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgc -42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp -gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcata -43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp -ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtatt -44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp -gtagaacttgttccccatggacaatgctagttccgttaatgccaggtatt -45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp -aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctat -46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp -aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaa -47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp -gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatg -48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp -catcaagatgggttacgtaggaccgagattcagtctctgggttagagccg -49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp -taacctcagtctcgttcccccctcggtagttcggacccttattcgcttat -50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp -taactgtcggtcactgctcatcccgactagttcggctcactagacttact +0: length 103 nt +ACGCCCTATGTCAGAGTGGTGTTAGTGCACAATTCATTCCGATATGCGAA +1: length 35 nt +ATAACGAGCGCGAGCCCTTAACGGGCAGGACTGCCGACTCACACCTTAAT +2: length 61 nt +GGTCTCATAAGCACTTCGGGTACATCCTCCTAAATGGGCCGCGACTTAGG +3: length 54 nt +TAGGTCTCGGATACGAGATATAACCGTAGCATGGGAGTGACACCCATGCC +4: length 64 nt +TCTGCGATACCCAATTTAGGGCATGGCATAAGGCCCGGCGGGTATTTCCG +5: length 85 nt +TTGTTAAGCGTGTTGGTAGCGCCCCCAAATTGTCCCAACTGGGCCACACC +6: length 53 nt +ATAGTACAAGTTAGACCTCATGCATTGAGCTGCCCGGTTCGGACCTTAAT +7: length 27 nt +TAAAATATTAAGGATAGCCGGAACGCGCTGGCGACTGACGACACTACACG +8: length 25 nt +TAGATTCCAGGGAATGACGTCACACCGTTCCGTATATCAGCGGCGCGGAA +9: length 52 nt +GGTGCACCTATTAACAGAGTGACTTTGGGGGAATCACTTATTCGCATATA +10: length 41 nt +AACGACAAGCTGAACGAGCTCTGGCGGCTACGCAGTGTTCGCGCAAGCGA +11: length 106 nt +GTAATAAACTAAACATCAGCTGAGGCGTCCGACCACTGCATCTTTTGTTG +12: length 5 nt +GTAATATGGTATAGCTCGACCCTCGTACAACGGGGGTCAGACAGCGTTGC +13: length 49 nt +TTACTTTGCACGCATGCATGGAGGCACCATGAAAGACGGGAAGCGGCTAA +14: length 22 nt +GCAAGGGGTTTGAGGGAGCAACGTAAGATCAGAGATAACAGTCTCAAGAG +15: length 109 nt +AATTGAAGTCAGCAAAGAATTGGGATTGCTGGAAGCAACACGGACACGTT +16: length 43 nt +GAACCATTCTAAACAGCACCACCCGCATCCCGCTTTGCGCAGCAGCCACT +17: length 85 nt +ATGTCTCACAGGGGAGGTCTCCAGGCTTCTACCAAGTTCCCAATTCCATG +18: length 16 nt +ATCTCATCAATTATGTTGTACACCTGCTCGTTCTAACTTGCAGACTGCAT +19: length 52 nt +GAGTATCGCAGTATACTGTTGGCGGACATTCCAGAATATTGGTATTCAGC +20: length 2 nt +ATATAGCTACCCCAGATTGTCTTAACGAGTCCCGCACCCCCCACCAAGAT +21: length 13 nt +GTATTTACCTGCCCACTAGTACAAGTCCCCGAACCTTAGCAATCCTAAGT +22: length 64 nt +TGGCCGTTTCAGAAGGATTCCTGCCCCTGTGATGAGTCACGTGGAGACAG +23: length 59 nt +CGAGGAGAGCTGTAAAATACTGCTGCCTGCTACATTGGGGCTTGGGTCTC +24: length 82 nt +GCACAGTAGCGAGTGACTCGGTTACTATGTGTTGCTGACTTTATATGTGC +25: length 6 nt +GAGTAAAGCTGAAACACCGAACCGGCAGTTAGAGCTAGTGGATTTTCTTA +26: length 87 nt +AGACATTTTCGGAACCAATGATCTTGTTGGAGTTGAGAGGCCGAACCAGG +27: length 136 nt +AACACGGTAATATCAGGTCGGCCTTAGTCACTTCGGAATTGGAGTTAGTA +28: length 68 nt +CACCGGTAGCTGAATGTAACGAATCTACGTTGACACGGTTAAAGGGATAT +29: length 84 nt +AGATTGTCATGGACTGACGGAGAACACCGTGCTTAAAGTGGAATAATGTG +30: length 51 nt +CGGCAAGGAGGAGGCTACTGACCAATCAGACAAACGTTCATGGCTAGAGA +31: length 51 nt +AGCAGCCGCTCAACTGTGACGTCGGCTGAGTCTACCAGCCCTCAGACTAC +32: length 73 nt +TCCGATCTTATAGTAGTGATCCTGATGGAACAATAACTGACCCAGTAAAG +33: length 58 nt +ACGTTTTATTGTACCTAGGTGTGGCACCATGAAAGGCAGGAGGCCCACCG +34: length 28 nt +AATCCGCAATCGTGCTACGCATTTTCCAGACAATATTTCGAGGTTACCGC +35: length 13 nt +AACACGAGGCCCTCTCGACGCTGCTATTTAACAGCGCGTGCCTATTGAAA +36: length 2 nt +ATACCGAAACTTACTAGTTGTGACAGCGGATTCCAAACTGCGGGTGAATA +37: length 71 nt +GTGAGGAGTACTCGGCACCGGGAACGTAGTAATGTGCTCTGATCCACTCT +38: length 46 nt +CGAACGTAAATAAGTCTGCTGCTGCCTCAGCAGAGGGATGGGAGGGGTCT +39: length 48 nt +CGCAAGAGTGTCGAGCTGGCAGACGCGTATTGCCTTTATATCCGTGAAAC +40: length 63 nt +TGACAAACATAAGTTTGTCGCCCGTTTCCACATAGTAGAATTCATTAGAA +41: length 11 nt +AGAAATAGTACGTAGTGTTGGCTGTGAATATTCGGAGATAAGGACTCATT +42: length 68 nt +GGTTCACAAATTGTCATAGATCATCGTGGTACGACGGGCGATCTTATTTC +43: length 7 nt +CGAGCGTGATGACGTAATGGAGTGAATAAGGAGTGCGAAATGGTGCGTTT +44: length 84 nt +TGGGACGATCATTCCCCCTGACCTTACGTACAATGCAATTGCGAGTGGCT +45: length 23 nt +TTGTACTTACGGCTTAAATTGCACAGAAGGGGTTCTCCGGAAGATAACTA +46: length 58 nt +GGCCCTTCGGTGCAGTCGGCCTAGGTATCAAGCTCGCCTGCCCGTGTTAT +47: length 50 nt +TATCGTGGTTATGTGCGGACCGCCTGATATTACAGCTGATGTGAGGGCAA +48: length 63 nt +TCTGCTAGGGATCCACTCCTGGGTCATTATTAAGCACTTACGGCCTGAGA +49: length 48 nt +GGAGAATTCGCGGCATAAGATCTCCATACGGGTAGTATGTGCCACTATTA +50: length 64 nt +AATGCGTACGGCCCAAAAACCTTCAGGATTACCATGAACCCGTAACTCAA +51: length 41 nt +AGTCATATAATGATTGAGTTCCCTCAACCCTCCGGACGTAAACACTCCGG +52: length 44 nt +TCCAGATGGCCTGGCAATCAGGTGTTGACGGGAACCGCACCCCTGCTGCT +53: length 62 nt +AGGTTGTCTGAATCACTCGGGAGACTTCCCCGCACGATATCGGCACCTGA +54: length 73 nt +CCGTACGGAACATCCTGCAGCTTCGCCCGGTCGAGAAGAAGGTAGAGATA +55: length 39 nt +TGCGCGGCGGACTCTGTCATTTATGACCTGCTTGAACGATGAGTGGGCCA +56: length 88 nt +CGATCCATAGGGCACGCTAGTTTGCATTTTAAGGAGTGCCAATTACGAGG +57: length 117 nt +TATTGCGTCCGTTGCCTGCTCCATCCGTTGCACTATGCCATAAGGCAACA +58: length 67 nt +TTAACGCACGATCAACGGTTCATAATCTGCGGTGATCCCAGAGCTCTACA +59: length 84 nt +CTGACCCTCGATGTGTTTAATAGTTGTATTTCGGCCCCAGTAGGCTCGAC +60: length 71 nt +CTGTCAGATGCGCATGTTCGTGGTCCATTGTCAGCCAAATTAAAGGTTGC +61: length 102 nt +TACATGCTCATATGAACTGACATTGAGTGGAAGGTAGCGACCGCGAGGTT +62: length 66 nt +ACATGATCTTTGTCGAATACTCAAAATGCACAGTGCCTAACTATTTGGGT +63: length 22 nt +GTCACGACACGCATCTACACTAAACAGCACCACATGTTAGAGATGAGATC +64: length 46 nt +CGTGGTCGTGAAGACCCACTATATTAGCAAGAGTCGCGATAGATGGGTGT +65: length 78 nt +CTAGGTAATTTTGAGAGACACCCCGACGCACGGATGTTCCGTACAAAGAT +66: length 32 nt +CAGTCTGAATGTATCAACTAAGAGAATCGTACGAAAATAGACCTAATCGA +67: length 42 nt +GCTCCGCACACTAGATAGCTGATGCCCACATACTCATTCATTATTCGATG +68: length 49 nt +GTCTGAACCAGACACGCACTACGGCTAGCACCTATCCTTATCCTAAAACG +69: length 41 nt +GAGTTACGACCGGACGGGTTGTTAGGTCACCCCGGAATCAGACGATTTAC +70: length 29 nt +GTAGGCCAACGCGAACTACTCCTCCCGCCCACAGTCTAGACTAAGCGTTG +71: length 94 nt +GGTTTAGACTGTCTGCAACTATGTTACTGGATATACCTAATGACAGGGGT +72: length 48 nt +TAAGAATGTTATTGGGCGGTAGCTTACTCTATTGGAATTGAGAAAATGTA +73: length 63 nt +CTAGTATGTTTTATTGGATGCTTGAGGTAGGAATAATCACATCGGGCCGA +74: length 70 nt +ACAAACCTCCTCCCCGGCGACGGGCAACGCCCGCATTGCAACATGCAAGA +75: length 58 nt +GGCTCCATTTCGCTACCCTCTCCCCTAAATCAGGTCCGCGGAGTTGTCGT +76: length 98 nt +ACATACCTATGCCTCAGATCGGTTCGGCTCTTGAGGCGACACCCGTGATA +77: length 34 nt +TTAGAAAGGAACTCATTTTAAGCAAGTTATGCCAGGTTAGCCCGCCCTGG +78: length 48 nt +AAGGCTCGCCGTGACCAGTAGTGCGCAACCTTTCAAGCCGTGTAATATTT +79: length 51 nt +CCTTAAGGGTGGTTTGCACCGTAAAGACCCTCTCTTCCACTTTCCCCGAA +80: length 30 nt +GCCTCGACGTGCCGGACGATGGGCGCTGATCGAAAGTCCATTGTCCCCCT +81: length 14 nt +AGGGCAGTGTACACACACGGCAAGGGTTGCGATGGGTAGAGGCTAAGACG +82: length 39 nt +AATGGGACTTAGTAAACAGGCTGGCCGCCTAGACTGTGTGTGAATGCCCA +83: length 29 nt +AGCTTGGACTTAGAGAAAATCAGTCTACAAGACATAGGCTTTAAAGAGGA +84: length 15 nt +ACGGAAAGTAAGTCTCTCGTCCCTGGTTAAAGGGTCAGTCCCTCCCACCT +85: length 20 nt +CCCAGAGCAAACTGGACCTCCCACCGGCAGATCATCGTTTATTATTATAC +86: length 64 nt +CGTGGCCAGTACATGCTGATGATCCCTATTACAGACCTCGCGTGTTGAGA +87: length 52 nt +GTTAGGCTGTACTGAATACCATGAGCATGTGGGTAGGTTGTACTGGAACC +88: length 60 nt +CGCCGGGCTCCCCGAAGTATAGAGCCAGCGGTTAAATAACTTATCTGATC +89: length 32 nt +CTTGTACGTCGAGTATGAGAGGGGTTACACTTTCTGCTTGCTAAGTTCAG +90: length 14 nt +AAACAGATTAATAACGTAATACCCCCGCTCTACGCCCCTGCATTACGGTT +91: length 21 nt +GTAGTTCGTAGCAGGGGATTGTCATAAATCAGCGCCTGATGGGAAGCGTT +92: length 47 nt +TGCCCAGATGTAACACATCCCGGCCCATTCTGGTAACTCCGTTTCTGGGT +93: length 47 nt +AATTAAGGGGTTGTCAAACCGGCCACTTTTGAACAATGGGCTTCGCCGCC +94: length 42 nt +TTCAACCTTTACGATGCAAACATAGACAAACTTGGACGTATTCGGACCCT +95: length 35 nt +GGCGCTTAGTGGAATGCCCCCCCGTATGCAGCATCGAGTATTCCGTAGCG +96: length 54 nt +TTGCTGATCTTTCGAATTTGGATCTGGGATTTTGCCAGATAGCGCCCGTA +97: length 34 nt +TACGTAGATATTTTCAGGAGGCACTGAACAGCCCAGAGCCTCCATCGGGC +98: length 15 nt +ATTGCGCAGGTATGGCTCAGAGACACAGTATACTATTACCTTCCCGCAAC +99: length 42 nt +CGATCTGGCATCGGCATCCGAGTAGCCCCTACGTATGGCTTTGCCCAACA diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/modules.py index 4859423ffc81ba0d49f122733043313315f0192f..f92458982cf1d57721279b44344fad2f262f80ee 100644 --- a/read_sequencer_package/modules.py +++ b/read_sequencer_package/modules.py @@ -1,7 +1,7 @@ import logging LOG = logging.getLogger(__name__) -def read_in_fasta(file_path): +def read_in_fasta(file_path: str) -> dict[str,str]: """ This function reads in FASTA files. @@ -10,26 +10,25 @@ def read_in_fasta(file_path): Returns: Dict: It returns a dictionary with sequences. - """ LOG.info("Reading in FASTA files from destination.") - sequences = {} + sequences: dict[str,str] = {} f = open(file_path) for line in f: if line[0] == '>': - defline = line.strip() - defline = defline.replace('>', '') + def_line = line.strip() + def_line = def_line.replace('>', '') else: - if defline not in sequences: - sequences[defline] = '' - sequences[defline] += line.strip() + if def_line not in sequences: + sequences[def_line] = '' + sequences[def_line] += line.strip() f.close() return sequences -def read_sequence(seq, read_length): +def read_sequence(seq:str, read_length:int) -> str: """ This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is - smaller then the requested length or cuts the sequence if its longer. + smaller than the requested length or cuts the sequence if its longer. Args: seq (str): the sequence to read @@ -40,8 +39,8 @@ def read_sequence(seq, read_length): """ from random import choice - bases = ["A", "T", "C", "G"] - sequenced = '' + bases: list[str] = ["A", "T", "C", "G"] + sequenced: str = '' if read_length > len(seq): for nt in range(len(seq)): sequenced += seq[nt] @@ -53,7 +52,7 @@ def read_sequence(seq, read_length): return sequenced -def simulate_sequencing(sequences, read_length): +def simulate_sequencing(sequences: dict[str,str], read_length: int) -> dict[str,str]: """ Simulates sequencing. @@ -65,38 +64,37 @@ def simulate_sequencing(sequences, read_length): dict: of n sequences as values """ LOG.info("Sequencing in progress....") - results = {} + results: dict[str,str] = {} for index, key in enumerate(sequences): results[key] = read_sequence(sequences[key], read_length=read_length) LOG.info("Sequencing was successfully executed.") return results - -def generate_sequences(n, mean, sd): +def generate_sequences(n: int, mean: int, sd: int) -> dict[str,str]: """ Generates random sequences. Args: n (int): Amount of sequences to generate. mean (int): mean length of sequence (gaussian distribution). - sd (float): standart deviation of length of sequence (gaussian distribution). + sd (float): standard deviation of length of sequence (gaussian distribution). Returns: dict: of n sequences """ from random import choice, gauss LOG.info("Generating random sequences.") - dict = {} + sequences: dict[str,str] = {} for i in range(n): - seq = "" - nt = ["A", "T", "C", "G"] - for value in range(abs(round(gauss(mean, sd)))): - seq = seq + choice(nt) - key = str(i) + ': length ' + str(len(seq)) + ' nt' - dict[key] = seq - return dict - -def write_fasta(sequences, file_path): + seq: str = "" + bases: list[str] = ["A", "T", "C", "G"] + for nt in range(abs(round(gauss(mean, sd)))): + seq = seq + choice(bases) + key: str = str(i) + ': length ' + str(len(seq)) + ' nt' + sequences[key] = seq + return sequences + +def write_fasta(sequences: dict[str,str], file_path: str): """ Takes a dictionary and writes it to a fasta file. Must specify the filename when calling the function. @@ -116,17 +114,17 @@ def write_fasta(sequences, file_path): class ReadSequencer: def __init__(self): - self.sequences = {} - self.reads = {} + self.sequences: dict[str,str] = {} + self.reads: dict[str,str] = {} - def add_random_sequences(self, n, mean, sd): - self.sequences = generate_sequences(n, mean, sd) + def add_random_sequences(self, n: int, mean: int, sd: int): + self.sequences: dict[str,str] = generate_sequences(n, mean, sd) def read_fasta(self, input_file): - self.sequences = read_in_fasta(input_file) + self.sequences: dict[str,str] = read_in_fasta(input_file) - def run_sequencing(self, read_length): - self.reads = simulate_sequencing(self.sequences, read_length) + def run_sequencing(self, read_length: int): + self.reads: dict[str,str] = simulate_sequencing(self.sequences, read_length) - def write_fasta(self, output_file_path): + def write_fasta(self, output_file_path: str): write_fasta(self.reads, output_file_path)