Skip to content
Snippets Groups Projects
Commit 5001fde3 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

feat: added random sequence generator to cli

parent d712ba97
No related branches found
No related tags found
1 merge request!21feat: added random sequence generator to cli
import argparse
from modules import read_sequencer as rs
from modules import ReadSequencer
import logging
parser = argparse.ArgumentParser(prog='read_sequencer',
......@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path',
parser.add_argument('--read_length',
help='read length for sequencing',
type=int)
parser.add_argument('--random', action='store_true', default=False,
help='generate random sequences')
parser.add_argument('--n_random', default=100, type=int, help='n random sequences')
parser.add_argument('--mean_random', default=50, type=int, help='mean random sequences')
parser.add_argument('--sd_random', default=25, type=int, help='standard deviation random sequences')
args = parser.parse_args()
def main():
LOG.info("Program started.")
read_sequencer = rs()
read_sequencer.read_fasta(args.input_file_path)
LOG.info("Read sequencer started.")
read_sequencer = ReadSequencer()
if args.random:
read_sequencer.add_random_sequences(n=args.n_random, mean=args.mean_random, sd=args.sd_random)
else:
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path)
LOG.info("Program finished.")
LOG.info("Read sequencer finished.")
if __name__ == '__main__':
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO,
)
level=logging.INFO)
LOG = logging.getLogger(__name__)
main()
1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggactta
aattccctagactagatctgtgttggaacgcctctctacgagaaggcgaacgaactggcg
ccgaggcgatcgctaacatcttcgtctcgcttgaaccacacaatggatgattcctcccta
ggggtttgacaatcaacctggatagcgtttaatatagatggctggttgatttgtaaggcc
ttcacagactactcagagcaataagtgaccccccaacaatcagaggctgatcctctgctc
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaa
2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttca
gttaaggtgccacacccccgggtggatcatccgtcagctttcctacaattaggtaactgg
cgggatcatttagtcttgtattaagacgctcgcgcccggggcggccggcttgtttgtgga
gagaaacaacaagtctgagtatagattaaatacaactggtttactggcaagtcagcgcgt
aacaaccggtgagccgctgcgcatgcttactgcaatgaacatcttggcacgatcctgcga
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtat
3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgaccc
ccggtgttctgccaccttctttggataggagaaccgtcactcgccccggaggccccacgg
ataagaagggtatcttgtgatcacgcgaatgactcacttgcgtaagtaatctaactttgt
ttttcgctataaaAAGCCGGGGTGTTGAAGTCTATCGTCGATGGCTGCAGTATTAAAGAT
CTTATGACTAATGCTCCAGCTTCCCCTCTGTGGTTAGAGACGAACTCGTCAGCCTCGATT
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgc
4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp
acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgat
acaactcaaaggtaactgtgcttaccacttccgaagctacatgcctctaacaaagtactt
tcgaggaggcactcaacccccggagatgctttgcgcggaagcagagatcgctgctcaaaa
tttggaatcactttcgtgcgagacccaaacaatttatggtggattcaagcgaacgagtca
tgattacagatctatcaatcgaggagaggacggcttcgccgtttccttttaatgtgaaac
acgtctggagcgtgggttgacccctgtacatggttctttccggatcctta
5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctg
acactgtagaagattagatacacttgtccctaaaattaacccttaaccgctattagccgt
gaacgcttcctaatatttcaagccgtatagctaagtggagaatgtggagccctggtcaaa
tcacgagccaattagccctagacggacagcacatctcgtcgcgttaagcggaacactcag
cttttattacctagtgctcagcctggtttccatatgctctaaccgaactgatgcatactt
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaa
6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccata
gacttatctcagacatggaccatgtcgatatcggacgccgtcttaccacatttttcatag
cccttcataaggcagcgtgctcttactgcccaataaggtggacgattccgaccctaggcg
aaccagcgctatagatggaccttctaattgatgcgcaacgtgattgtttccttggtctgg
gttagcatttcggtagcctaacagtcactccagttcgctaactggcctggatgagggccc
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcc
7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgctt
gctaagggacacggatcaatgatgaccagacttatggtgtcaggtctcactatattacat
atccggaacccgtgcccgcaccacgcgctgggtctaggcgaccggtgcatcatctccgcg
tctctagaggattctctcggtaaatgctgaattgcgtgagatcaaatccgtatgccagtc
atgAGGCAAGGCGTATAGATCTTTCCTCAGCATGAACCGGAACATCCTACAATCGCACGC
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcaga
8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp
attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgta
aacttgatggctttacgcctgcacgggcttcatacacacatgaccgtggacaaagtcgcc
caggccctcgaatagggtgtaatggttaacggttagtgccaccccaatgggtgcgaggca
gtaagagtgtcctatggcaaaactctcctcgtttcagaagggtcgctcctctagcctcct
tatcccccctataatagtactcgccgggtacgagccggagctccctcgagaagtcatcct
attggcccggtccaggacagagccttatattgctactggtatgagaaccg
9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacg
ggcactagttgaatggggggtttttttcgtaggtcgtaataggtactcggatagtcggcc
cagagttatgcttaagaatgcgctgcttaattcaatgtgactgccgttgtctccgatcag
atccaggtgatgattgcgatcgcagcgacatatgtctcgaaagacgtgtcgtgaataagc
ctgtaagcccaatgcaacatggttccctcaccttgtagctgatgtaccgtgtttcaatct
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcac
10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcc
tgggatctctagcctagtattatgcgGACGTTTCCAGCCCCGCTGACCCTGAGTCGGACG
TGAAAGCGAATCATCATATAAAGCATACACTGACTTGCACAAGTTGAAATAAGAGGTTCG
CTTAGGCTTGCCTCTCTAGTGGCGCAGCAGTACTGAGTGGGTTCTACTTACTCTCTGGAC
TCATGATCGTGTACACCGTGAAAGACGGCCCATGTGCCGTATATCTACCGTGCATAACCT
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgat
11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcac
cccacgttgcgatcatggccaaggccatggtttgctcaaaaatcccacattcgccgtctt
acgcgttaggacctcactatcccacagacggtgcgttaccttgtagttgacgcgggatcg
tggtgataacagctatttccgagacttcatattcttttacatagcggcttaccgtagtga
ctccatacattatttgcctattttgtagtgccccgaacagtaaggggaagccaactgccg
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtccc
12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp
gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagg
gttacttaatcccttcgatgctttcaaaggccctaatcagtattgagcaacgaaagcgga
gtcgttagtgtccaagttgcAAATGGTATCGCAGAGCGGTCGGATTCGCCAAAAAGCTTG
GCCAATGGCTAAAATTTAGCTCTCGCCCTCGCGGACATTGTGGACATGTGCCTGACTACC
CATCATTCTCGCTCGGCACTAGGCCACCAGGGTTAACGATTTCGTAACAACGACCCGCAT
gaattcctggggatttactcacccccgaggcggacaagatttccagctgg
13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggac
attctcagtggggagcgaagagttgcgcttagagccgacgtacacgatataacctcaatt
gaaaatcgctatgtgcatcgttagggcctccggcgtgctgtttcggcagctgagtgtgag
ggtataacttaccttcgacccgaattgtctcgcggaaatcctaggcaagtaatccacttt
tggtacgggggagctagttcctctaagacgaacaagtgcactcttcacgtatagtgccct
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacg
14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtca
tctaagcgtatcggctcatactggtggtaactagacttggtgaaccctaggtgccggcat
atcgaggtccgcatccaaaataactatcgctatagctacatagacatttactcgcaatat
tacacgaaccgtacgtccctcggtattaacgtaatggttaaagtctctaattccgctgca
gagcggcgggataaagacgccggtgtggcctgaatggtggatctgtccgtagtaccACGT
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgtt
15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattc
cttgttaaagtccgcaccaagtgtactgtaagaatggtcgctcgtaataataacgagaag
atcctcgagccgtggtctgctgcaactaccttgagcggtacatcgatgtcccactctggg
cggggatcaggggcgagacttgtggtgaggccaaagaatggcgcatatgtaggcaccata
cgtcgatacgttccaggagtagaggcctcgaacatacaccacgataagtctacagacgca
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaact
16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgcctt
agcaaccactgcttatctgcgtattatacctttacaatcattacatttgatctatctgtg
taccggttttttttgattcaattcgctggattacgacctcccggccaaaaattctcaatt
catcgttaacagacgtatttgaagataatcattcaacgtgaactagcacttggtcacttg
gtacgccaaccaagctgtgctttggggcaaccctttataactcacatgccgtcctaggac
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcac
17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgact
cacgtagttcccgacgttcagtcccctccaacgtggaaggtaggacccatctccttaacg
ggatcgatcggtcttcctgtgaaagttgctcagagtcctcaaggacgtttttgggtgcgt
gtacggtatggttatggtacgtgtctgtgacagagggtattcttactggttaagtgaccc
atatgaccacctgacgcccgagcatagacctgtaggggtcgacgcgagagatggcagctt
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacc
18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtg
ggagtgtggcacttattggtaaaaggcatttttacgaacgacactgataggattgatcac
tcaagaaatgttctcgaccctgaggtaggagtcttaacagacggacatcctccgtagata
cgtgagaattaagggacgcatgtcgaaaacgcttggaatctactgtagtggcccacctta
cgcttcttccaataactcccttcatagtccggcaacctcggtgggggtttcccttaggcc
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttcc
19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgt
taggccctcgggccccttcatatgtacggagtcattgaattagcattatactaccgttac
gcaagaccctatcccatccgcgactgtcaccactgctgtaaggttgcaaggctgtttcaa
tgtaaagtaggcgaattctgacgtgggctgataacgaatcccccgggttatctagtgcaa
gtgctatccCAGCAGTAGCTGTTGACTCGGAGGCGTCAGAACTTCCCTCTAGTGTGGCAA
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataat
20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaat
ctgcagatagaagtcagccctctcacgtcaataggaatgctgcccgtcatgtttaactac
tcaagttttaaggtgtcccttatcggttccaggatcatgtctgaaggaagatggtcgcaa
cgaaatctggagtggcatacatcgttcggtcgaagcataatctcagacgttatctataaa
gttagggcgctgtatggattgggattcaagctcgaagcctgttcctgccatacagcgcct
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtac
21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctt
tggccaagcgtacaagaccccggacccatacgctcccggctgataaactgctacagcatg
gtatatccggatgatgcccctgaaaactgcggaagtcaatttgttgatgaatccccgact
ttccgctgttcctgtggatggtcgaatgccaaatgaagagctgctccccccttctttaat
atcaagcactacaaagataaagcctgtttggctgacggcgagccctcccctatcgtacgc
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaa
22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccgg
ttgttaactgcgggagctataacacttattccttactgcgacggctgatccactaagaac
agttcatagagctcggctatataatttgaagacatagattccacggtacttgtagcccat
aaccgctgaggaggaacgtccaacggttcgcgcggagcatgtgacgcttaaaggGATGTA
AACAAAAGTTTCCAGGCGGCGGCGGTGTAGGCCGTTGCACTGTACTAGGCACAGCTACTG
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtc
23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttag
tgtttgtacacgttaaggaaaagcgttagcttaaccattacgccccccaaagcccggtgt
gtagttatctacatgccgtgtcaaagcggtgactaaatgtttatcaagttctgatgacaa
cgtgagctcttaaagccattgactagtataagcacggaacaatgataccaggcaagcttg
aatataggataaggcctctaagctcgaagcggatcttacggaggtgtgaatcaacagcac
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgt
24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgt
caacatgggggttgctcagtttggttggtcaatcaacggtggcagaccatgcgataacga
tgatggtaagactgtaaggtaagttaaatactctcgtctgccagttgggtcgtcaacgct
gcagagacgccattcttcccagaaggtccgagctttctacagtgccgcggcgtcatgacc
aaaggggtccaacctcgcagtaaaatgtctatgcttctggtttggaatgagaccgggcca
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaag
25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgcca
atgacagatagagccattaatcgtggaaaccaggcatttatacttgtccgatgtatcgat
tctcctctatctacagagcccggacatgcgaaatatcaaaattccatgtatactgaataa
atacattgggcaagccgggctcatgcagcaatcccagcgttgccttacgcaaagatatct
tacggagttgcctttagattaacagcacgtgttcaaaaacctagccaactctgtcggtct
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgac
26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp
gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcac
gtattaatcagctatcaacagatactatcgtcacagccctccttctggcgaaggatctga
gcatttgcaaagctataagttggtacgcaacggtagagggcttcgtagtcggggaaaggg
cttgcagtagtataggccgtaacttatctgttgcaacctcaaccgcacgaatcgattact
ctataactgccctcaatacagtatggttaccagtcaccttcacactgaagattaattcgc
gggtgcgttatggggactaaagactgttactaccggtactccgccttata
27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgttt
ggcctctacctacgagtctacgcgggcgtttttaagcaagctacgatcatcttgatccaa
gggtacgaggccccgcagaccaatggaggtcgtgaccaccctcgtgtatgcctcgcacta
agcgagcattctggtatactgtctctctcctgtgataataacagtcggctcgatattcag
ttcacatgaaacagtatgttatataggtgggatggttataacacggaaaggtgaaaaaga
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgct
28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggat
gtcttagcgctctcgttccgatgggtgctgatactagtaaatgagactcgagaccgagaa
cacgcaacggctacaacctggtcggttgttggggtttttataatcagtgTACGAAATAGT
AGACCTCGCCCTGTAGTTGGAAATTACCGTCATTGGTCTATACAAAGCGCGTGCTTCAGG
ACTCGGTGCCGAATCACTCCGTGCCAAATTAACGAAAGCTTCTGATGTGGATATGAGTTC
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaa
29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaatt
gggagtgctggcacttgtgccctacagtcaagcgctcacgcggtgttctcctcccgcaat
cttagatattaggctctgtaccgcacgaaggatgaattttcttgactattggtccctgtt
tacgagggcttacctagagtgaggatgaacataaacaaggcctacttgacttaaggcttc
caaatcacttgagggcaaatgactcctcaaacgcgagtgccagtactatccgtgagggaa
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgc
30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctcTGAAGCTG
GCCATAACAGGCAAGTTATTGATTGGACCTTACGACGTACATGGCATTGCGAGACACGGA
AGCTTGAGCACTCATAGATCTGCCAGTCCCAACGCAAGTTGTTTTTCCCGCATCATTCAG
AAGAAGAAGACGGATTGAACCAAACTGTCATATCAGTGGTTAGCTGCTGTAAATTCAGCG
GATTGAGGGATGGGTTTTTCGGAACATTATCTTAGCGGGAACAATAAATGCGAAAACCAG
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccc
31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccg
gtctaacagggcttccgaatcaatagactgatagtaatgggatcctgaggctgggacccg
acacacggcatattttactagaaacgctgatttaaactccaattatccttgacgcactga
gccacagtcttagacgcagaatgtccgcaggagccctgtctttcccctaaatcattcgcg
gcatttgtttacgggttaagtcctgcggatcctagagtctgggccccgtacaaccaggaa
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaa
32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgc
tcggcactgcatttttatacgtcgaatcagaaacgaggttcctcctctaggcttgttaaa
aatccgggcgcgatgggctggtaatctgtggccatgggagcctcgccatttaaagatttt
ggttaaggctcctctgttgtgtccatcacccttgaacgagcccgtacaaaccgtgtacga
tgttgacacTGCAGAGAGGGCGTTTCGTACCATAACATGCATCCTAGGCGGTCATATTGT
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatca
33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctg
ggcttcacgacatgttcttaaatcaatactctaaatctgctttgtagcatgcctcaagta
aaaaaatgtgctggttccgcacaggtgtgacgattaacgttgcgcccgtttgcgtcagtc
cagatcaccgatcttccacaccaccggtgggctgccggactgcaggtaatgactcctggc
tgcattctctgacataaaggttgaatagaacggcgtccttgagaaggttatggaacgAAA
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacct
34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatat
tgatagtggttagaccaccggcgcatcatcgtacgagcgcgggcgatacgtgtctttcac
cggcgcactaatcttatcttacttctcaagccccgacagcatgtacgccaagtgttgttc
tgatgaaactttcgaaatagcaactgttagtcagttatagttggggagggcagtgaatac
ctcaaatacacccaagaaataacttcgaagcggcgcctatatcacacccctgtttcttat
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcagg
35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatct
gcggggtaatgcaacagggggctaccagacggtaaaccagggtcttgctattggtgttac
gaaacaaaggagctatgcgacctcattagatcgagattactctcacaggcagctccggcc
atagcacaactaatttcgggtgtggagctcaccacaggaacatcttgtgcgtcctttgtt
atttaattgtgcattgtaatgcaccggaccccgggaacatacagccattatctgtgttgc
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggtt
36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagt
tacacgccgaggtgtaaacgaatacgattgctatatgcaacgagttggttacacgcgtga
aggcgaatgtggatgctgcacttggagtcccattttaccggccgcacgtgctagctcact
caccttgCAGATATACACAGCTCGGGGCTTATGTGAGGGCTTTCTGTAGATCGGGAGCTT
ACCAAAATATTATGGCAAGGACTCACACTTTGATATACGCTTCACAATACTAAGTCCAGT
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgaga
37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtca
acgagacgacccattgtcacgttgtaaggccaccaataacacacaggtcttcgtttgctg
tctcagggcaatcgcatcgacaacatcgtatggataccgttttttatcagcttacggcgc
atcatactaataaggtgtttgagagggcgcagactcgaagcagtgtgatcttcccggttc
gaagatgcaaaaacggtcctatttcgatccaaaactcagcgcactagtccaatgcttttt
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagact
38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattca
cttaccccttgtcaccgggcagaagagagccagtttaggtgtggttgtatttgccaaacc
gcaaaccgcctaatgagctggatccggccatggaattaatcccgtcgtttgactcgaggt
gttcaaagactgtgcaacacgacgtgcattcatcactagaacttaatctagaccaggcct
tgtggccaggagaggcgacgtgatattgccctatacacagataattatatacccctcgcg
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcacta
39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaac
ggtttgaaatcctttgcaaacttgatctgggtatatgaaccggtatgcggggatagtggt
aaataagtagtttacgagctgagcgtggattatcccagagaagttgccttaggtccagag
cccgcacctacaatcactcgaggccggtcgagcgttgcgtggcaaggaaacccagccggt
caccctaccctcaaactcacgtcattgatccaatcatacatggcgtctctcacggtggtg
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatca
40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgcc
gtctactcgacagggaccccccgtcggttcctctctatagcaatcgcggaagtggttccc
tgcctcccgcgcagaagttcaaactagtaatccttaatgacttgtggggggggagatcag
tttcttccacaatggagtaaacttatgcgagaatcaagatcgcagaggccattttttgat
gatactgtcagatatgtggttagccgtatcacgttaccgacgcagaattTGGTGAAATAG
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaac
41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtcc
ttatgcttgagacatgaatccttgccccatattggcgatcttggccaatgagatctgtcg
aaagtactggaggccggtaaattgggggctctagaggtccgcccctgaaggactaacgtg
tgtgtgtgtctacgtgtcgggttatcagcgtgttggacgatggccgtggattcaacgcat
gctagagagctaatgatcctccgaagtcaaaagcctcagtgcttcgatttatgagcgcgt
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgc
42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacg
gacctctctgagatctcgcaccgcgcgcccggccggcactatcgatgctagactagggtt
ggtgactagcccgtcaaaaccagcctaaacgcaaagattgtaggcgctagtccggaactg
actgcttcgtgtcggtgggagcctagtatgtttccgggtctatgacccctaaaatcatag
acgtgtcttaatagctatacctgacttactttgaagtacttgccacgacgagtttatgag
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcata
43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggact
tccaaatactccaagacgtcaatacgctttatctttgtgaagtcatcccggaccgagcgc
ttgggtcgtgatttaaaatcccctgtgatgtggctacaggtgcggcctatacagccgaga
agaaggccgtctttaggcgtccaatgaaccgttacagggacacaccaaactgcgccaact
gatcccacgggtcacggtacgctctaagaccagtcgggattctgacttaacatcgcagca
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtatt
44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaa
gcgcctgcctggggaatacgagcctctctacaaacttacggccaccatgcttaaagattc
ggtgacttcactaatgacctatacaagtaatgcggaggacgctgtcgcttattgctcttt
gctaaggccagttatgtccgtcagtcaacgatacgctgcggcggtgggtgacggcactag
accggaagcctgatgacaagttcgaatcaataGTCGCTTCCATTACACTTGCGCTATTCC
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtatt
45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgacc
ttgatcgcatgtgaacccgcccaaaaacccgtctcgacaaaagttacgtcgcatgggctg
cgccaccggatagctcctagcttatcttataaatcaggtagagctacaacatggtgctat
gacaactggagtgtcatcgctttggcgaaaccgtaaagggtgggaattgctgcattctca
actgggccgaactattccgcattcggctgctcacaaatcgtggaatgtgtccttgaacgt
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctat
46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactg
ctacgttcaccgtgttcagagatagagagtacattagggaccaatcacaacgttcgccag
ggcaccgcctaatccgcgttgttagcaagagtacaggctctcgtatactttcagaccctt
caatactagacgacaaattgcagcccggggtcatcggtcgactcagatacgtgctaacga
gtaccaggtctaccgttgcaacgttggatgcgttatactcggcataaggcgatgcccttt
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaa
47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccga
cgcggagaattccctaaccactattgtcctctgcatcgatatcaggaataggcttacctg
caatctcttatggtgatagactgtttgggagctgaacctgagacgcgcacgaaatttgga
aggatcaaataggccccgcagtctctggtagacttctgccgagcggactagcttggctaa
ggtgtacaagcctaaatcgtttttcacatcaattttatagctgattatagaggaacgacg
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatg
48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggc
cgctacatagtacacggcgaggaatgcggggttgggctgaaccgtacacagtgggctagc
tgcggtacctgccaccggcatgcgtttaaatcctttcctttggcgaagccaactgccgac
gtccgcaacagagactcgttttccgaccccgttactaaatcagctaactggcgcctgaat
cctcttacgtcggatgttaattagtgtatagaatatcggagggttgagtgcgacgcgctt
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccg
49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattca
tcactgtagaccaaggaccgggcatacttgcggatatctaccaggactaggcacttaggg
atacgctgttgaatacgggtttcgtcccgtgtactcaagtgtagtttaagataggtacga
gtgctagtacatcgtacaatttacaactgacttaaacgagagtttattatgtcttgttca
cttgttgacacgcctgggaaaataataaaaggcaacgtctaatctcagacccgttgatta
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttat
50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp
taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcg
agaagtaggacgtcgtgtaatactccaacgtcgttacgcaatgttgtaaaacttcatcgc
attccgtgcatggcctaaacgtgcagcattatataacgctctttggtcttaatatccatc
gcgggagtaacgcgaaggggagacgtgtgcctgaACGAACGCTAAACTAGGTACTAAGTC
GTGAAGCTCGGGTGGAGACAGGTAAACTGATCGCAACGTATCAACCAATTCTGGACCCTA
taactgtcggtcactgctcatcccgactagttcggctcactagacttact
import logging
LOG = logging.getLogger(__name__)
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
LOG.info("Generating sequences.")
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path):
'''
"""
This function reads in FASTA files.
Args:
......@@ -36,7 +11,7 @@ def read_in_fasta(file_path):
Returns:
Dict: It returns a dictionary with sequences.
'''
"""
LOG.info("Reading in FASTA files from destination.")
sequences = {}
f = open(file_path)
......@@ -52,7 +27,7 @@ def read_in_fasta(file_path):
return sequences
def read_sequence(seq, read_length):
'''
"""
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
......@@ -63,12 +38,11 @@ def read_sequence(seq, read_length):
Returns:
str: returns sequenced element
'''
"""
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
if read_length > len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq), read_length):
......@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length):
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.")
return results
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
......@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd):
Returns:
dict: of n sequences
"""
from random import choice, gauss
LOG.info("Generating random sequences.")
dict1 = {}
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
key = str(i) + ': length ' + str(len(seq)) + ' nt'
dict[key] = seq
return dict
def write_fasta(sequences, file_path):
"""
......@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path):
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
LOG.info("Sequencing was successfully executed.")
class read_sequencer:
class ReadSequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment