Skip to content
Snippets Groups Projects
Commit a319197f authored by Christoph Harmel's avatar Christoph Harmel
Browse files

Merge branch 'read_sequencer_class' into 'main'

feat: added read_sequencer class, updated main() in cli.py accordingly

See merge request !15
parents d75827a3 200b8c6c
No related branches found
No related tags found
1 merge request!15feat: added read_sequencer class, updated main() in cli.py accordingly
import argparse
from modules import run_read_sequencer
from modules import read_sequencer as rs
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of a DNA sequences specified by an FASTA file.')
description='Simulates sequencing of DNA sequences specified by an FASTA file.')
parser.add_argument('--input_file_path',
help='path to FASTA file')
parser.add_argument('--output_file_path',
......@@ -13,10 +13,11 @@ parser.add_argument('--read_length',
args = parser.parse_args()
def main():
run_read_sequencer(args.input_file_path, args.read_length, args.output_file_path)
read_sequencer = rs()
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path)
if __name__ == '__main__':
main()
1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggactta
aattccctagactagatctgtgttggaacgcctctctacgagaaggcgaacgaactggcg
ccgaggcgatcgctaacatcttcgtctcgcttgaaccacacaatggatgattcctcccta
ggggtttgacaatcaacctggatagcgtttaatatagatggctggttgatttgtaaggcc
ttcacagactactcagagcaataagtgaccccccaacaatcagaggctgatcctctgctc
2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttca
gttaaggtgccacacccccgggtggatcatccgtcagctttcctacaattaggtaactgg
cgggatcatttagtcttgtattaagacgctcgcgcccggggcggccggcttgtttgtgga
gagaaacaacaagtctgagtatagattaaatacaactggtttactggcaagtcagcgcgt
aacaaccggtgagccgctgcgcatgcttactgcaatgaacatcttggcacgatcctgcga
3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgaccc
ccggtgttctgccaccttctttggataggagaaccgtcactcgccccggaggccccacgg
ataagaagggtatcttgtgatcacgcgaatgactcacttgcgtaagtaatctaactttgt
ttttcgctataaaAAGCCGGGGTGTTGAAGTCTATCGTCGATGGCTGCAGTATTAAAGAT
CTTATGACTAATGCTCCAGCTTCCCCTCTGTGGTTAGAGACGAACTCGTCAGCCTCGATT
4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp
acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgat
acaactcaaaggtaactgtgcttaccacttccgaagctacatgcctctaacaaagtactt
tcgaggaggcactcaacccccggagatgctttgcgcggaagcagagatcgctgctcaaaa
tttggaatcactttcgtgcgagacccaaacaatttatggtggattcaagcgaacgagtca
tgattacagatctatcaatcgaggagaggacggcttcgccgtttccttttaatgtgaaac
5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctg
acactgtagaagattagatacacttgtccctaaaattaacccttaaccgctattagccgt
gaacgcttcctaatatttcaagccgtatagctaagtggagaatgtggagccctggtcaaa
tcacgagccaattagccctagacggacagcacatctcgtcgcgttaagcggaacactcag
cttttattacctagtgctcagcctggtttccatatgctctaaccgaactgatgcatactt
6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccata
gacttatctcagacatggaccatgtcgatatcggacgccgtcttaccacatttttcatag
cccttcataaggcagcgtgctcttactgcccaataaggtggacgattccgaccctaggcg
aaccagcgctatagatggaccttctaattgatgcgcaacgtgattgtttccttggtctgg
gttagcatttcggtagcctaacagtcactccagttcgctaactggcctggatgagggccc
7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgctt
gctaagggacacggatcaatgatgaccagacttatggtgtcaggtctcactatattacat
atccggaacccgtgcccgcaccacgcgctgggtctaggcgaccggtgcatcatctccgcg
tctctagaggattctctcggtaaatgctgaattgcgtgagatcaaatccgtatgccagtc
atgAGGCAAGGCGTATAGATCTTTCCTCAGCATGAACCGGAACATCCTACAATCGCACGC
8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp
attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgta
aacttgatggctttacgcctgcacgggcttcatacacacatgaccgtggacaaagtcgcc
caggccctcgaatagggtgtaatggttaacggttagtgccaccccaatgggtgcgaggca
gtaagagtgtcctatggcaaaactctcctcgtttcagaagggtcgctcctctagcctcct
tatcccccctataatagtactcgccgggtacgagccggagctccctcgagaagtcatcct
9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacg
ggcactagttgaatggggggtttttttcgtaggtcgtaataggtactcggatagtcggcc
cagagttatgcttaagaatgcgctgcttaattcaatgtgactgccgttgtctccgatcag
atccaggtgatgattgcgatcgcagcgacatatgtctcgaaagacgtgtcgtgaataagc
ctgtaagcccaatgcaacatggttccctcaccttgtagctgatgtaccgtgtttcaatct
10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcc
tgggatctctagcctagtattatgcgGACGTTTCCAGCCCCGCTGACCCTGAGTCGGACG
TGAAAGCGAATCATCATATAAAGCATACACTGACTTGCACAAGTTGAAATAAGAGGTTCG
CTTAGGCTTGCCTCTCTAGTGGCGCAGCAGTACTGAGTGGGTTCTACTTACTCTCTGGAC
TCATGATCGTGTACACCGTGAAAGACGGCCCATGTGCCGTATATCTACCGTGCATAACCT
11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcac
cccacgttgcgatcatggccaaggccatggtttgctcaaaaatcccacattcgccgtctt
acgcgttaggacctcactatcccacagacggtgcgttaccttgtagttgacgcgggatcg
tggtgataacagctatttccgagacttcatattcttttacatagcggcttaccgtagtga
ctccatacattatttgcctattttgtagtgccccgaacagtaaggggaagccaactgccg
12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp
gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagg
gttacttaatcccttcgatgctttcaaaggccctaatcagtattgagcaacgaaagcgga
gtcgttagtgtccaagttgcAAATGGTATCGCAGAGCGGTCGGATTCGCCAAAAAGCTTG
GCCAATGGCTAAAATTTAGCTCTCGCCCTCGCGGACATTGTGGACATGTGCCTGACTACC
CATCATTCTCGCTCGGCACTAGGCCACCAGGGTTAACGATTTCGTAACAACGACCCGCAT
13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggac
attctcagtggggagcgaagagttgcgcttagagccgacgtacacgatataacctcaatt
gaaaatcgctatgtgcatcgttagggcctccggcgtgctgtttcggcagctgagtgtgag
ggtataacttaccttcgacccgaattgtctcgcggaaatcctaggcaagtaatccacttt
tggtacgggggagctagttcctctaagacgaacaagtgcactcttcacgtatagtgccct
14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtca
tctaagcgtatcggctcatactggtggtaactagacttggtgaaccctaggtgccggcat
atcgaggtccgcatccaaaataactatcgctatagctacatagacatttactcgcaatat
tacacgaaccgtacgtccctcggtattaacgtaatggttaaagtctctaattccgctgca
gagcggcgggataaagacgccggtgtggcctgaatggtggatctgtccgtagtaccACGT
15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattc
cttgttaaagtccgcaccaagtgtactgtaagaatggtcgctcgtaataataacgagaag
atcctcgagccgtggtctgctgcaactaccttgagcggtacatcgatgtcccactctggg
cggggatcaggggcgagacttgtggtgaggccaaagaatggcgcatatgtaggcaccata
cgtcgatacgttccaggagtagaggcctcgaacatacaccacgataagtctacagacgca
16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgcctt
agcaaccactgcttatctgcgtattatacctttacaatcattacatttgatctatctgtg
taccggttttttttgattcaattcgctggattacgacctcccggccaaaaattctcaatt
catcgttaacagacgtatttgaagataatcattcaacgtgaactagcacttggtcacttg
gtacgccaaccaagctgtgctttggggcaaccctttataactcacatgccgtcctaggac
17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgact
cacgtagttcccgacgttcagtcccctccaacgtggaaggtaggacccatctccttaacg
ggatcgatcggtcttcctgtgaaagttgctcagagtcctcaaggacgtttttgggtgcgt
gtacggtatggttatggtacgtgtctgtgacagagggtattcttactggttaagtgaccc
atatgaccacctgacgcccgagcatagacctgtaggggtcgacgcgagagatggcagctt
18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtg
ggagtgtggcacttattggtaaaaggcatttttacgaacgacactgataggattgatcac
tcaagaaatgttctcgaccctgaggtaggagtcttaacagacggacatcctccgtagata
cgtgagaattaagggacgcatgtcgaaaacgcttggaatctactgtagtggcccacctta
cgcttcttccaataactcccttcatagtccggcaacctcggtgggggtttcccttaggcc
19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgt
taggccctcgggccccttcatatgtacggagtcattgaattagcattatactaccgttac
gcaagaccctatcccatccgcgactgtcaccactgctgtaaggttgcaaggctgtttcaa
tgtaaagtaggcgaattctgacgtgggctgataacgaatcccccgggttatctagtgcaa
gtgctatccCAGCAGTAGCTGTTGACTCGGAGGCGTCAGAACTTCCCTCTAGTGTGGCAA
20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaat
ctgcagatagaagtcagccctctcacgtcaataggaatgctgcccgtcatgtttaactac
tcaagttttaaggtgtcccttatcggttccaggatcatgtctgaaggaagatggtcgcaa
cgaaatctggagtggcatacatcgttcggtcgaagcataatctcagacgttatctataaa
gttagggcgctgtatggattgggattcaagctcgaagcctgttcctgccatacagcgcct
21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctt
tggccaagcgtacaagaccccggacccatacgctcccggctgataaactgctacagcatg
gtatatccggatgatgcccctgaaaactgcggaagtcaatttgttgatgaatccccgact
ttccgctgttcctgtggatggtcgaatgccaaatgaagagctgctccccccttctttaat
atcaagcactacaaagataaagcctgtttggctgacggcgagccctcccctatcgtacgc
22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccgg
ttgttaactgcgggagctataacacttattccttactgcgacggctgatccactaagaac
agttcatagagctcggctatataatttgaagacatagattccacggtacttgtagcccat
aaccgctgaggaggaacgtccaacggttcgcgcggagcatgtgacgcttaaaggGATGTA
AACAAAAGTTTCCAGGCGGCGGCGGTGTAGGCCGTTGCACTGTACTAGGCACAGCTACTG
23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttag
tgtttgtacacgttaaggaaaagcgttagcttaaccattacgccccccaaagcccggtgt
gtagttatctacatgccgtgtcaaagcggtgactaaatgtttatcaagttctgatgacaa
cgtgagctcttaaagccattgactagtataagcacggaacaatgataccaggcaagcttg
aatataggataaggcctctaagctcgaagcggatcttacggaggtgtgaatcaacagcac
24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgt
caacatgggggttgctcagtttggttggtcaatcaacggtggcagaccatgcgataacga
tgatggtaagactgtaaggtaagttaaatactctcgtctgccagttgggtcgtcaacgct
gcagagacgccattcttcccagaaggtccgagctttctacagtgccgcggcgtcatgacc
aaaggggtccaacctcgcagtaaaatgtctatgcttctggtttggaatgagaccgggcca
25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgcca
atgacagatagagccattaatcgtggaaaccaggcatttatacttgtccgatgtatcgat
tctcctctatctacagagcccggacatgcgaaatatcaaaattccatgtatactgaataa
atacattgggcaagccgggctcatgcagcaatcccagcgttgccttacgcaaagatatct
tacggagttgcctttagattaacagcacgtgttcaaaaacctagccaactctgtcggtct
26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp
gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcac
gtattaatcagctatcaacagatactatcgtcacagccctccttctggcgaaggatctga
gcatttgcaaagctataagttggtacgcaacggtagagggcttcgtagtcggggaaaggg
cttgcagtagtataggccgtaacttatctgttgcaacctcaaccgcacgaatcgattact
ctataactgccctcaatacagtatggttaccagtcaccttcacactgaagattaattcgc
27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgttt
ggcctctacctacgagtctacgcgggcgtttttaagcaagctacgatcatcttgatccaa
gggtacgaggccccgcagaccaatggaggtcgtgaccaccctcgtgtatgcctcgcacta
agcgagcattctggtatactgtctctctcctgtgataataacagtcggctcgatattcag
ttcacatgaaacagtatgttatataggtgggatggttataacacggaaaggtgaaaaaga
28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggat
gtcttagcgctctcgttccgatgggtgctgatactagtaaatgagactcgagaccgagaa
cacgcaacggctacaacctggtcggttgttggggtttttataatcagtgTACGAAATAGT
AGACCTCGCCCTGTAGTTGGAAATTACCGTCATTGGTCTATACAAAGCGCGTGCTTCAGG
ACTCGGTGCCGAATCACTCCGTGCCAAATTAACGAAAGCTTCTGATGTGGATATGAGTTC
29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaatt
gggagtgctggcacttgtgccctacagtcaagcgctcacgcggtgttctcctcccgcaat
cttagatattaggctctgtaccgcacgaaggatgaattttcttgactattggtccctgtt
tacgagggcttacctagagtgaggatgaacataaacaaggcctacttgacttaaggcttc
caaatcacttgagggcaaatgactcctcaaacgcgagtgccagtactatccgtgagggaa
30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctcTGAAGCTG
GCCATAACAGGCAAGTTATTGATTGGACCTTACGACGTACATGGCATTGCGAGACACGGA
AGCTTGAGCACTCATAGATCTGCCAGTCCCAACGCAAGTTGTTTTTCCCGCATCATTCAG
AAGAAGAAGACGGATTGAACCAAACTGTCATATCAGTGGTTAGCTGCTGTAAATTCAGCG
GATTGAGGGATGGGTTTTTCGGAACATTATCTTAGCGGGAACAATAAATGCGAAAACCAG
31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccg
gtctaacagggcttccgaatcaatagactgatagtaatgggatcctgaggctgggacccg
acacacggcatattttactagaaacgctgatttaaactccaattatccttgacgcactga
gccacagtcttagacgcagaatgtccgcaggagccctgtctttcccctaaatcattcgcg
gcatttgtttacgggttaagtcctgcggatcctagagtctgggccccgtacaaccaggaa
32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgc
tcggcactgcatttttatacgtcgaatcagaaacgaggttcctcctctaggcttgttaaa
aatccgggcgcgatgggctggtaatctgtggccatgggagcctcgccatttaaagatttt
ggttaaggctcctctgttgtgtccatcacccttgaacgagcccgtacaaaccgtgtacga
tgttgacacTGCAGAGAGGGCGTTTCGTACCATAACATGCATCCTAGGCGGTCATATTGT
33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctg
ggcttcacgacatgttcttaaatcaatactctaaatctgctttgtagcatgcctcaagta
aaaaaatgtgctggttccgcacaggtgtgacgattaacgttgcgcccgtttgcgtcagtc
cagatcaccgatcttccacaccaccggtgggctgccggactgcaggtaatgactcctggc
tgcattctctgacataaaggttgaatagaacggcgtccttgagaaggttatggaacgAAA
34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatat
tgatagtggttagaccaccggcgcatcatcgtacgagcgcgggcgatacgtgtctttcac
cggcgcactaatcttatcttacttctcaagccccgacagcatgtacgccaagtgttgttc
tgatgaaactttcgaaatagcaactgttagtcagttatagttggggagggcagtgaatac
ctcaaatacacccaagaaataacttcgaagcggcgcctatatcacacccctgtttcttat
35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatct
gcggggtaatgcaacagggggctaccagacggtaaaccagggtcttgctattggtgttac
gaaacaaaggagctatgcgacctcattagatcgagattactctcacaggcagctccggcc
atagcacaactaatttcgggtgtggagctcaccacaggaacatcttgtgcgtcctttgtt
atttaattgtgcattgtaatgcaccggaccccgggaacatacagccattatctgtgttgc
36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagt
tacacgccgaggtgtaaacgaatacgattgctatatgcaacgagttggttacacgcgtga
aggcgaatgtggatgctgcacttggagtcccattttaccggccgcacgtgctagctcact
caccttgCAGATATACACAGCTCGGGGCTTATGTGAGGGCTTTCTGTAGATCGGGAGCTT
ACCAAAATATTATGGCAAGGACTCACACTTTGATATACGCTTCACAATACTAAGTCCAGT
37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtca
acgagacgacccattgtcacgttgtaaggccaccaataacacacaggtcttcgtttgctg
tctcagggcaatcgcatcgacaacatcgtatggataccgttttttatcagcttacggcgc
atcatactaataaggtgtttgagagggcgcagactcgaagcagtgtgatcttcccggttc
gaagatgcaaaaacggtcctatttcgatccaaaactcagcgcactagtccaatgcttttt
38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattca
cttaccccttgtcaccgggcagaagagagccagtttaggtgtggttgtatttgccaaacc
gcaaaccgcctaatgagctggatccggccatggaattaatcccgtcgtttgactcgaggt
gttcaaagactgtgcaacacgacgtgcattcatcactagaacttaatctagaccaggcct
tgtggccaggagaggcgacgtgatattgccctatacacagataattatatacccctcgcg
39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaac
ggtttgaaatcctttgcaaacttgatctgggtatatgaaccggtatgcggggatagtggt
aaataagtagtttacgagctgagcgtggattatcccagagaagttgccttaggtccagag
cccgcacctacaatcactcgaggccggtcgagcgttgcgtggcaaggaaacccagccggt
caccctaccctcaaactcacgtcattgatccaatcatacatggcgtctctcacggtggtg
40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgcc
gtctactcgacagggaccccccgtcggttcctctctatagcaatcgcggaagtggttccc
tgcctcccgcgcagaagttcaaactagtaatccttaatgacttgtggggggggagatcag
tttcttccacaatggagtaaacttatgcgagaatcaagatcgcagaggccattttttgat
gatactgtcagatatgtggttagccgtatcacgttaccgacgcagaattTGGTGAAATAG
41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtcc
ttatgcttgagacatgaatccttgccccatattggcgatcttggccaatgagatctgtcg
aaagtactggaggccggtaaattgggggctctagaggtccgcccctgaaggactaacgtg
tgtgtgtgtctacgtgtcgggttatcagcgtgttggacgatggccgtggattcaacgcat
gctagagagctaatgatcctccgaagtcaaaagcctcagtgcttcgatttatgagcgcgt
42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacg
gacctctctgagatctcgcaccgcgcgcccggccggcactatcgatgctagactagggtt
ggtgactagcccgtcaaaaccagcctaaacgcaaagattgtaggcgctagtccggaactg
actgcttcgtgtcggtgggagcctagtatgtttccgggtctatgacccctaaaatcatag
acgtgtcttaatagctatacctgacttactttgaagtacttgccacgacgagtttatgag
43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggact
tccaaatactccaagacgtcaatacgctttatctttgtgaagtcatcccggaccgagcgc
ttgggtcgtgatttaaaatcccctgtgatgtggctacaggtgcggcctatacagccgaga
agaaggccgtctttaggcgtccaatgaaccgttacagggacacaccaaactgcgccaact
gatcccacgggtcacggtacgctctaagaccagtcgggattctgacttaacatcgcagca
44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaa
gcgcctgcctggggaatacgagcctctctacaaacttacggccaccatgcttaaagattc
ggtgacttcactaatgacctatacaagtaatgcggaggacgctgtcgcttattgctcttt
gctaaggccagttatgtccgtcagtcaacgatacgctgcggcggtgggtgacggcactag
accggaagcctgatgacaagttcgaatcaataGTCGCTTCCATTACACTTGCGCTATTCC
45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgacc
ttgatcgcatgtgaacccgcccaaaaacccgtctcgacaaaagttacgtcgcatgggctg
cgccaccggatagctcctagcttatcttataaatcaggtagagctacaacatggtgctat
gacaactggagtgtcatcgctttggcgaaaccgtaaagggtgggaattgctgcattctca
actgggccgaactattccgcattcggctgctcacaaatcgtggaatgtgtccttgaacgt
46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactg
ctacgttcaccgtgttcagagatagagagtacattagggaccaatcacaacgttcgccag
ggcaccgcctaatccgcgttgttagcaagagtacaggctctcgtatactttcagaccctt
caatactagacgacaaattgcagcccggggtcatcggtcgactcagatacgtgctaacga
gtaccaggtctaccgttgcaacgttggatgcgttatactcggcataaggcgatgcccttt
47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccga
cgcggagaattccctaaccactattgtcctctgcatcgatatcaggaataggcttacctg
caatctcttatggtgatagactgtttgggagctgaacctgagacgcgcacgaaatttgga
aggatcaaataggccccgcagtctctggtagacttctgccgagcggactagcttggctaa
ggtgtacaagcctaaatcgtttttcacatcaattttatagctgattatagaggaacgacg
48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggc
cgctacatagtacacggcgaggaatgcggggttgggctgaaccgtacacagtgggctagc
tgcggtacctgccaccggcatgcgtttaaatcctttcctttggcgaagccaactgccgac
gtccgcaacagagactcgttttccgaccccgttactaaatcagctaactggcgcctgaat
cctcttacgtcggatgttaattagtgtatagaatatcggagggttgagtgcgacgcgctt
49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattca
tcactgtagaccaaggaccgggcatacttgcggatatctaccaggactaggcacttaggg
atacgctgttgaatacgggtttcgtcccgtgtactcaagtgtagtttaagataggtacga
gtgctagtacatcgtacaatttacaactgacttaaacgagagtttattatgtcttgttca
cttgttgacacgcctgggaaaataataaaaggcaacgtctaatctcagacccgttgatta
50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp
taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcg
agaagtaggacgtcgtgtaatactccaacgtcgttacgcaatgttgtaaaacttcatcgc
attccgtgcatggcctaaacgtgcagcattatataacgctctttggtcttaatatccatc
gcgggagtaacgcgaaggggagacgtgtgcctgaACGAACGCTAAACTAGGTACTAAGTC
GTGAAGCTCGGGTGGAGACAGGTAAACTGATCGCAACGTATCAACCAATTCTGGACCCTA
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standart deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
dict1 = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path):
'''
......@@ -42,7 +65,7 @@ def read_sequence(seq, read_length):
if read_length >= len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq),read_length):
for nt in range(len(seq), read_length):
sequenced += choice(bases)
else:
for nt in range(read_length):
......@@ -63,7 +86,7 @@ def simulate_sequencing(sequences, read_length):
"""
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key],read_length=read_length)
results[key] = read_sequence(sequences[key], read_length=read_length)
return results
......@@ -107,7 +130,19 @@ def write_fasta(sequences, file_path):
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
def run_read_sequencer(input_file_path, read_length, output_file_path):
sequences = read_in_fasta(input_file_path)
reads = simulate_sequencing(sequences, read_length)
write_fasta(reads, output_file_path)
class read_sequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
def add_random_sequences(self, n, mean, sd):
self.sequences = generate_sequences(n, mean, sd)
def read_fasta(self, input_file):
self.sequences = read_in_fasta(input_file)
def run_sequencing(self, read_length):
self.reads = simulate_sequencing(self.sequences, read_length)
def write_fasta(self, output_file_path):
write_fasta(self.reads, output_file_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment