Skip to content
Snippets Groups Projects
Commit c60004ba authored by Christoph Harmel's avatar Christoph Harmel
Browse files

Merge branch 'fix_logging' into 'main'

feat: added random sequence generator to cli

See merge request !21
parents d712ba97 5001fde3
No related branches found
No related tags found
1 merge request!21feat: added random sequence generator to cli
import argparse import argparse
from modules import read_sequencer as rs from modules import ReadSequencer
import logging import logging
parser = argparse.ArgumentParser(prog='read_sequencer', parser = argparse.ArgumentParser(prog='read_sequencer',
...@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path', ...@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path',
parser.add_argument('--read_length', parser.add_argument('--read_length',
help='read length for sequencing', help='read length for sequencing',
type=int) type=int)
parser.add_argument('--random', action='store_true', default=False,
help='generate random sequences')
parser.add_argument('--n_random', default=100, type=int, help='n random sequences')
parser.add_argument('--mean_random', default=50, type=int, help='mean random sequences')
parser.add_argument('--sd_random', default=25, type=int, help='standard deviation random sequences')
args = parser.parse_args() args = parser.parse_args()
def main(): def main():
LOG.info("Program started.") LOG.info("Read sequencer started.")
read_sequencer = rs() read_sequencer = ReadSequencer()
read_sequencer.read_fasta(args.input_file_path) if args.random:
read_sequencer.add_random_sequences(n=args.n_random, mean=args.mean_random, sd=args.sd_random)
else:
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length) read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path) read_sequencer.write_fasta(args.output_file_path)
LOG.info("Program finished.") LOG.info("Read sequencer finished.")
if __name__ == '__main__': if __name__ == '__main__':
logging.basicConfig( logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO, level=logging.INFO)
)
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
main() main()
1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp 1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggactta tgagcactcggtgccaagggcggggatacacagatggttggctgatacaa
aattccctagactagatctgtgttggaacgcctctctacgagaaggcgaacgaactggcg
ccgaggcgatcgctaacatcttcgtctcgcttgaaccacacaatggatgattcctcccta
ggggtttgacaatcaacctggatagcgtttaatatagatggctggttgatttgtaaggcc
ttcacagactactcagagcaataagtgaccccccaacaatcagaggctgatcctctgctc
2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp 2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttca ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtat
gttaaggtgccacacccccgggtggatcatccgtcagctttcctacaattaggtaactgg
cgggatcatttagtcttgtattaagacgctcgcgcccggggcggccggcttgtttgtgga
gagaaacaacaagtctgagtatagattaaatacaactggtttactggcaagtcagcgcgt
aacaaccggtgagccgctgcgcatgcttactgcaatgaacatcttggcacgatcctgcga
3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp 3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgaccc acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgc
ccggtgttctgccaccttctttggataggagaaccgtcactcgccccggaggccccacgg
ataagaagggtatcttgtgatcacgcgaatgactcacttgcgtaagtaatctaactttgt
ttttcgctataaaAAGCCGGGGTGTTGAAGTCTATCGTCGATGGCTGCAGTATTAAAGAT
CTTATGACTAATGCTCCAGCTTCCCCTCTGTGGTTAGAGACGAACTCGTCAGCCTCGATT
4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp 4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp
acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgat acgtctggagcgtgggttgacccctgtacatggttctttccggatcctta
acaactcaaaggtaactgtgcttaccacttccgaagctacatgcctctaacaaagtactt
tcgaggaggcactcaacccccggagatgctttgcgcggaagcagagatcgctgctcaaaa
tttggaatcactttcgtgcgagacccaaacaatttatggtggattcaagcgaacgagtca
tgattacagatctatcaatcgaggagaggacggcttcgccgtttccttttaatgtgaaac
5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp 5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctg agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaa
acactgtagaagattagatacacttgtccctaaaattaacccttaaccgctattagccgt
gaacgcttcctaatatttcaagccgtatagctaagtggagaatgtggagccctggtcaaa
tcacgagccaattagccctagacggacagcacatctcgtcgcgttaagcggaacactcag
cttttattacctagtgctcagcctggtttccatatgctctaaccgaactgatgcatactt
6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp 6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccata tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcc
gacttatctcagacatggaccatgtcgatatcggacgccgtcttaccacatttttcatag
cccttcataaggcagcgtgctcttactgcccaataaggtggacgattccgaccctaggcg
aaccagcgctatagatggaccttctaattgatgcgcaacgtgattgtttccttggtctgg
gttagcatttcggtagcctaacagtcactccagttcgctaactggcctggatgagggccc
7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp 7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgctt actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcaga
gctaagggacacggatcaatgatgaccagacttatggtgtcaggtctcactatattacat
atccggaacccgtgcccgcaccacgcgctgggtctaggcgaccggtgcatcatctccgcg
tctctagaggattctctcggtaaatgctgaattgcgtgagatcaaatccgtatgccagtc
atgAGGCAAGGCGTATAGATCTTTCCTCAGCATGAACCGGAACATCCTACAATCGCACGC
8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp 8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp
attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgta attggcccggtccaggacagagccttatattgctactggtatgagaaccg
aacttgatggctttacgcctgcacgggcttcatacacacatgaccgtggacaaagtcgcc
caggccctcgaatagggtgtaatggttaacggttagtgccaccccaatgggtgcgaggca
gtaagagtgtcctatggcaaaactctcctcgtttcagaagggtcgctcctctagcctcct
tatcccccctataatagtactcgccgggtacgagccggagctccctcgagaagtcatcct
9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp 9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacg aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcac
ggcactagttgaatggggggtttttttcgtaggtcgtaataggtactcggatagtcggcc
cagagttatgcttaagaatgcgctgcttaattcaatgtgactgccgttgtctccgatcag
atccaggtgatgattgcgatcgcagcgacatatgtctcgaaagacgtgtcgtgaataagc
ctgtaagcccaatgcaacatggttccctcaccttgtagctgatgtaccgtgtttcaatct
10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp 10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcc atcctagcgccaaagatttactgttatggggtcgacgaacactagccgat
tgggatctctagcctagtattatgcgGACGTTTCCAGCCCCGCTGACCCTGAGTCGGACG
TGAAAGCGAATCATCATATAAAGCATACACTGACTTGCACAAGTTGAAATAAGAGGTTCG
CTTAGGCTTGCCTCTCTAGTGGCGCAGCAGTACTGAGTGGGTTCTACTTACTCTCTGGAC
TCATGATCGTGTACACCGTGAAAGACGGCCCATGTGCCGTATATCTACCGTGCATAACCT
11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp 11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcac cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtccc
cccacgttgcgatcatggccaaggccatggtttgctcaaaaatcccacattcgccgtctt
acgcgttaggacctcactatcccacagacggtgcgttaccttgtagttgacgcgggatcg
tggtgataacagctatttccgagacttcatattcttttacatagcggcttaccgtagtga
ctccatacattatttgcctattttgtagtgccccgaacagtaaggggaagccaactgccg
12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp 12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp
gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagg gaattcctggggatttactcacccccgaggcggacaagatttccagctgg
gttacttaatcccttcgatgctttcaaaggccctaatcagtattgagcaacgaaagcgga
gtcgttagtgtccaagttgcAAATGGTATCGCAGAGCGGTCGGATTCGCCAAAAAGCTTG
GCCAATGGCTAAAATTTAGCTCTCGCCCTCGCGGACATTGTGGACATGTGCCTGACTACC
CATCATTCTCGCTCGGCACTAGGCCACCAGGGTTAACGATTTCGTAACAACGACCCGCAT
13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp 13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggac aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacg
attctcagtggggagcgaagagttgcgcttagagccgacgtacacgatataacctcaatt
gaaaatcgctatgtgcatcgttagggcctccggcgtgctgtttcggcagctgagtgtgag
ggtataacttaccttcgacccgaattgtctcgcggaaatcctaggcaagtaatccacttt
tggtacgggggagctagttcctctaagacgaacaagtgcactcttcacgtatagtgccct
14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp 14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtca atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgtt
tctaagcgtatcggctcatactggtggtaactagacttggtgaaccctaggtgccggcat
atcgaggtccgcatccaaaataactatcgctatagctacatagacatttactcgcaatat
tacacgaaccgtacgtccctcggtattaacgtaatggttaaagtctctaattccgctgca
gagcggcgggataaagacgccggtgtggcctgaatggtggatctgtccgtagtaccACGT
15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp 15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattc accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaact
cttgttaaagtccgcaccaagtgtactgtaagaatggtcgctcgtaataataacgagaag
atcctcgagccgtggtctgctgcaactaccttgagcggtacatcgatgtcccactctggg
cggggatcaggggcgagacttgtggtgaggccaaagaatggcgcatatgtaggcaccata
cgtcgatacgttccaggagtagaggcctcgaacatacaccacgataagtctacagacgca
16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp 16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgcctt ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcac
agcaaccactgcttatctgcgtattatacctttacaatcattacatttgatctatctgtg
taccggttttttttgattcaattcgctggattacgacctcccggccaaaaattctcaatt
catcgttaacagacgtatttgaagataatcattcaacgtgaactagcacttggtcacttg
gtacgccaaccaagctgtgctttggggcaaccctttataactcacatgccgtcctaggac
17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp 17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgact attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacc
cacgtagttcccgacgttcagtcccctccaacgtggaaggtaggacccatctccttaacg
ggatcgatcggtcttcctgtgaaagttgctcagagtcctcaaggacgtttttgggtgcgt
gtacggtatggttatggtacgtgtctgtgacagagggtattcttactggttaagtgaccc
atatgaccacctgacgcccgagcatagacctgtaggggtcgacgcgagagatggcagctt
18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp 18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtg accgattacaggcagtcggccttgtccgctcgtatatccagggatgttcc
ggagtgtggcacttattggtaaaaggcatttttacgaacgacactgataggattgatcac
tcaagaaatgttctcgaccctgaggtaggagtcttaacagacggacatcctccgtagata
cgtgagaattaagggacgcatgtcgaaaacgcttggaatctactgtagtggcccacctta
cgcttcttccaataactcccttcatagtccggcaacctcggtgggggtttcccttaggcc
19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp 19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgt ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataat
taggccctcgggccccttcatatgtacggagtcattgaattagcattatactaccgttac
gcaagaccctatcccatccgcgactgtcaccactgctgtaaggttgcaaggctgtttcaa
tgtaaagtaggcgaattctgacgtgggctgataacgaatcccccgggttatctagtgcaa
gtgctatccCAGCAGTAGCTGTTGACTCGGAGGCGTCAGAACTTCCCTCTAGTGTGGCAA
20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp 20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaat atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtac
ctgcagatagaagtcagccctctcacgtcaataggaatgctgcccgtcatgtttaactac
tcaagttttaaggtgtcccttatcggttccaggatcatgtctgaaggaagatggtcgcaa
cgaaatctggagtggcatacatcgttcggtcgaagcataatctcagacgttatctataaa
gttagggcgctgtatggattgggattcaagctcgaagcctgttcctgccatacagcgcct
21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp 21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctt cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaa
tggccaagcgtacaagaccccggacccatacgctcccggctgataaactgctacagcatg
gtatatccggatgatgcccctgaaaactgcggaagtcaatttgttgatgaatccccgact
ttccgctgttcctgtggatggtcgaatgccaaatgaagagctgctccccccttctttaat
atcaagcactacaaagataaagcctgtttggctgacggcgagccctcccctatcgtacgc
22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp 22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccgg caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtc
ttgttaactgcgggagctataacacttattccttactgcgacggctgatccactaagaac
agttcatagagctcggctatataatttgaagacatagattccacggtacttgtagcccat
aaccgctgaggaggaacgtccaacggttcgcgcggagcatgtgacgcttaaaggGATGTA
AACAAAAGTTTCCAGGCGGCGGCGGTGTAGGCCGTTGCACTGTACTAGGCACAGCTACTG
23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp 23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttag atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgt
tgtttgtacacgttaaggaaaagcgttagcttaaccattacgccccccaaagcccggtgt
gtagttatctacatgccgtgtcaaagcggtgactaaatgtttatcaagttctgatgacaa
cgtgagctcttaaagccattgactagtataagcacggaacaatgataccaggcaagcttg
aatataggataaggcctctaagctcgaagcggatcttacggaggtgtgaatcaacagcac
24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp 24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgt cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaag
caacatgggggttgctcagtttggttggtcaatcaacggtggcagaccatgcgataacga
tgatggtaagactgtaaggtaagttaaatactctcgtctgccagttgggtcgtcaacgct
gcagagacgccattcttcccagaaggtccgagctttctacagtgccgcggcgtcatgacc
aaaggggtccaacctcgcagtaaaatgtctatgcttctggtttggaatgagaccgggcca
25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp 25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgcca ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgac
atgacagatagagccattaatcgtggaaaccaggcatttatacttgtccgatgtatcgat
tctcctctatctacagagcccggacatgcgaaatatcaaaattccatgtatactgaataa
atacattgggcaagccgggctcatgcagcaatcccagcgttgccttacgcaaagatatct
tacggagttgcctttagattaacagcacgtgttcaaaaacctagccaactctgtcggtct
26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp 26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp
gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcac gggtgcgttatggggactaaagactgttactaccggtactccgccttata
gtattaatcagctatcaacagatactatcgtcacagccctccttctggcgaaggatctga
gcatttgcaaagctataagttggtacgcaacggtagagggcttcgtagtcggggaaaggg
cttgcagtagtataggccgtaacttatctgttgcaacctcaaccgcacgaatcgattact
ctataactgccctcaatacagtatggttaccagtcaccttcacactgaagattaattcgc
27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp 27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgttt gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgct
ggcctctacctacgagtctacgcgggcgtttttaagcaagctacgatcatcttgatccaa
gggtacgaggccccgcagaccaatggaggtcgtgaccaccctcgtgtatgcctcgcacta
agcgagcattctggtatactgtctctctcctgtgataataacagtcggctcgatattcag
ttcacatgaaacagtatgttatataggtgggatggttataacacggaaaggtgaaaaaga
28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp 28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggat agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaa
gtcttagcgctctcgttccgatgggtgctgatactagtaaatgagactcgagaccgagaa
cacgcaacggctacaacctggtcggttgttggggtttttataatcagtgTACGAAATAGT
AGACCTCGCCCTGTAGTTGGAAATTACCGTCATTGGTCTATACAAAGCGCGTGCTTCAGG
ACTCGGTGCCGAATCACTCCGTGCCAAATTAACGAAAGCTTCTGATGTGGATATGAGTTC
29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp 29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaatt tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgc
gggagtgctggcacttgtgccctacagtcaagcgctcacgcggtgttctcctcccgcaat
cttagatattaggctctgtaccgcacgaaggatgaattttcttgactattggtccctgtt
tacgagggcttacctagagtgaggatgaacataaacaaggcctacttgacttaaggcttc
caaatcacttgagggcaaatgactcctcaaacgcgagtgccagtactatccgtgagggaa
30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp 30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctcTGAAGCTG caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccc
GCCATAACAGGCAAGTTATTGATTGGACCTTACGACGTACATGGCATTGCGAGACACGGA
AGCTTGAGCACTCATAGATCTGCCAGTCCCAACGCAAGTTGTTTTTCCCGCATCATTCAG
AAGAAGAAGACGGATTGAACCAAACTGTCATATCAGTGGTTAGCTGCTGTAAATTCAGCG
GATTGAGGGATGGGTTTTTCGGAACATTATCTTAGCGGGAACAATAAATGCGAAAACCAG
31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp 31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccg tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaa
gtctaacagggcttccgaatcaatagactgatagtaatgggatcctgaggctgggacccg
acacacggcatattttactagaaacgctgatttaaactccaattatccttgacgcactga
gccacagtcttagacgcagaatgtccgcaggagccctgtctttcccctaaatcattcgcg
gcatttgtttacgggttaagtcctgcggatcctagagtctgggccccgtacaaccaggaa
32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp 32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgc gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatca
tcggcactgcatttttatacgtcgaatcagaaacgaggttcctcctctaggcttgttaaa
aatccgggcgcgatgggctggtaatctgtggccatgggagcctcgccatttaaagatttt
ggttaaggctcctctgttgtgtccatcacccttgaacgagcccgtacaaaccgtgtacga
tgttgacacTGCAGAGAGGGCGTTTCGTACCATAACATGCATCCTAGGCGGTCATATTGT
33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp 33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctg gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacct
ggcttcacgacatgttcttaaatcaatactctaaatctgctttgtagcatgcctcaagta
aaaaaatgtgctggttccgcacaggtgtgacgattaacgttgcgcccgtttgcgtcagtc
cagatcaccgatcttccacaccaccggtgggctgccggactgcaggtaatgactcctggc
tgcattctctgacataaaggttgaatagaacggcgtccttgagaaggttatggaacgAAA
34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp 34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatat gcctaggggtcttgaccacagggagtacgagcattgatcattggagcagg
tgatagtggttagaccaccggcgcatcatcgtacgagcgcgggcgatacgtgtctttcac
cggcgcactaatcttatcttacttctcaagccccgacagcatgtacgccaagtgttgttc
tgatgaaactttcgaaatagcaactgttagtcagttatagttggggagggcagtgaatac
ctcaaatacacccaagaaataacttcgaagcggcgcctatatcacacccctgtttcttat
35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp 35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatct gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggtt
gcggggtaatgcaacagggggctaccagacggtaaaccagggtcttgctattggtgttac
gaaacaaaggagctatgcgacctcattagatcgagattactctcacaggcagctccggcc
atagcacaactaatttcgggtgtggagctcaccacaggaacatcttgtgcgtcctttgtt
atttaattgtgcattgtaatgcaccggaccccgggaacatacagccattatctgtgttgc
36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp 36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagt ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgaga
tacacgccgaggtgtaaacgaatacgattgctatatgcaacgagttggttacacgcgtga
aggcgaatgtggatgctgcacttggagtcccattttaccggccgcacgtgctagctcact
caccttgCAGATATACACAGCTCGGGGCTTATGTGAGGGCTTTCTGTAGATCGGGAGCTT
ACCAAAATATTATGGCAAGGACTCACACTTTGATATACGCTTCACAATACTAAGTCCAGT
37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp 37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtca ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagact
acgagacgacccattgtcacgttgtaaggccaccaataacacacaggtcttcgtttgctg
tctcagggcaatcgcatcgacaacatcgtatggataccgttttttatcagcttacggcgc
atcatactaataaggtgtttgagagggcgcagactcgaagcagtgtgatcttcccggttc
gaagatgcaaaaacggtcctatttcgatccaaaactcagcgcactagtccaatgcttttt
38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp 38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattca gtggcctaccataaatcaatttgggttaacgctctttgatctacgcacta
cttaccccttgtcaccgggcagaagagagccagtttaggtgtggttgtatttgccaaacc
gcaaaccgcctaatgagctggatccggccatggaattaatcccgtcgtttgactcgaggt
gttcaaagactgtgcaacacgacgtgcattcatcactagaacttaatctagaccaggcct
tgtggccaggagaggcgacgtgatattgccctatacacagataattatatacccctcgcg
39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp 39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaac accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatca
ggtttgaaatcctttgcaaacttgatctgggtatatgaaccggtatgcggggatagtggt
aaataagtagtttacgagctgagcgtggattatcccagagaagttgccttaggtccagag
cccgcacctacaatcactcgaggccggtcgagcgttgcgtggcaaggaaacccagccggt
caccctaccctcaaactcacgtcattgatccaatcatacatggcgtctctcacggtggtg
40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp 40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgcc agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaac
gtctactcgacagggaccccccgtcggttcctctctatagcaatcgcggaagtggttccc
tgcctcccgcgcagaagttcaaactagtaatccttaatgacttgtggggggggagatcag
tttcttccacaatggagtaaacttatgcgagaatcaagatcgcagaggccattttttgat
gatactgtcagatatgtggttagccgtatcacgttaccgacgcagaattTGGTGAAATAG
41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp 41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtcc ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgc
ttatgcttgagacatgaatccttgccccatattggcgatcttggccaatgagatctgtcg
aaagtactggaggccggtaaattgggggctctagaggtccgcccctgaaggactaacgtg
tgtgtgtgtctacgtgtcgggttatcagcgtgttggacgatggccgtggattcaacgcat
gctagagagctaatgatcctccgaagtcaaaagcctcagtgcttcgatttatgagcgcgt
42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp 42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacg gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcata
gacctctctgagatctcgcaccgcgcgcccggccggcactatcgatgctagactagggtt
ggtgactagcccgtcaaaaccagcctaaacgcaaagattgtaggcgctagtccggaactg
actgcttcgtgtcggtgggagcctagtatgtttccgggtctatgacccctaaaatcatag
acgtgtcttaatagctatacctgacttactttgaagtacttgccacgacgagtttatgag
43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp 43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggact ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtatt
tccaaatactccaagacgtcaatacgctttatctttgtgaagtcatcccggaccgagcgc
ttgggtcgtgatttaaaatcccctgtgatgtggctacaggtgcggcctatacagccgaga
agaaggccgtctttaggcgtccaatgaaccgttacagggacacaccaaactgcgccaact
gatcccacgggtcacggtacgctctaagaccagtcgggattctgacttaacatcgcagca
44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp 44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaa gtagaacttgttccccatggacaatgctagttccgttaatgccaggtatt
gcgcctgcctggggaatacgagcctctctacaaacttacggccaccatgcttaaagattc
ggtgacttcactaatgacctatacaagtaatgcggaggacgctgtcgcttattgctcttt
gctaaggccagttatgtccgtcagtcaacgatacgctgcggcggtgggtgacggcactag
accggaagcctgatgacaagttcgaatcaataGTCGCTTCCATTACACTTGCGCTATTCC
45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp 45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgacc aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctat
ttgatcgcatgtgaacccgcccaaaaacccgtctcgacaaaagttacgtcgcatgggctg
cgccaccggatagctcctagcttatcttataaatcaggtagagctacaacatggtgctat
gacaactggagtgtcatcgctttggcgaaaccgtaaagggtgggaattgctgcattctca
actgggccgaactattccgcattcggctgctcacaaatcgtggaatgtgtccttgaacgt
46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp 46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactg aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaa
ctacgttcaccgtgttcagagatagagagtacattagggaccaatcacaacgttcgccag
ggcaccgcctaatccgcgttgttagcaagagtacaggctctcgtatactttcagaccctt
caatactagacgacaaattgcagcccggggtcatcggtcgactcagatacgtgctaacga
gtaccaggtctaccgttgcaacgttggatgcgttatactcggcataaggcgatgcccttt
47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp 47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccga gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatg
cgcggagaattccctaaccactattgtcctctgcatcgatatcaggaataggcttacctg
caatctcttatggtgatagactgtttgggagctgaacctgagacgcgcacgaaatttgga
aggatcaaataggccccgcagtctctggtagacttctgccgagcggactagcttggctaa
ggtgtacaagcctaaatcgtttttcacatcaattttatagctgattatagaggaacgacg
48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp 48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggc catcaagatgggttacgtaggaccgagattcagtctctgggttagagccg
cgctacatagtacacggcgaggaatgcggggttgggctgaaccgtacacagtgggctagc
tgcggtacctgccaccggcatgcgtttaaatcctttcctttggcgaagccaactgccgac
gtccgcaacagagactcgttttccgaccccgttactaaatcagctaactggcgcctgaat
cctcttacgtcggatgttaattagtgtatagaatatcggagggttgagtgcgacgcgctt
49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp 49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattca taacctcagtctcgttcccccctcggtagttcggacccttattcgcttat
tcactgtagaccaaggaccgggcatacttgcggatatctaccaggactaggcacttaggg
atacgctgttgaatacgggtttcgtcccgtgtactcaagtgtagtttaagataggtacga
gtgctagtacatcgtacaatttacaactgacttaaacgagagtttattatgtcttgttca
cttgttgacacgcctgggaaaataataaaaggcaacgtctaatctcagacccgttgatta
50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp 50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp
taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcg taactgtcggtcactgctcatcccgactagttcggctcactagacttact
agaagtaggacgtcgtgtaatactccaacgtcgttacgcaatgttgtaaaacttcatcgc
attccgtgcatggcctaaacgtgcagcattatataacgctctttggtcttaatatccatc
gcgggagtaacgcgaaggggagacgtgtgcctgaACGAACGCTAAACTAGGTACTAAGTC
GTGAAGCTCGGGTGGAGACAGGTAAACTGATCGCAACGTATCAACCAATTCTGGACCCTA
import logging import logging
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
LOG.info("Generating sequences.")
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path): def read_in_fasta(file_path):
''' """
This function reads in FASTA files. This function reads in FASTA files.
Args: Args:
...@@ -36,7 +11,7 @@ def read_in_fasta(file_path): ...@@ -36,7 +11,7 @@ def read_in_fasta(file_path):
Returns: Returns:
Dict: It returns a dictionary with sequences. Dict: It returns a dictionary with sequences.
''' """
LOG.info("Reading in FASTA files from destination.") LOG.info("Reading in FASTA files from destination.")
sequences = {} sequences = {}
f = open(file_path) f = open(file_path)
...@@ -52,7 +27,7 @@ def read_in_fasta(file_path): ...@@ -52,7 +27,7 @@ def read_in_fasta(file_path):
return sequences return sequences
def read_sequence(seq, read_length): def read_sequence(seq, read_length):
''' """
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer. smaller then the requested length or cuts the sequence if its longer.
...@@ -63,12 +38,11 @@ def read_sequence(seq, read_length): ...@@ -63,12 +38,11 @@ def read_sequence(seq, read_length):
Returns: Returns:
str: returns sequenced element str: returns sequenced element
''' """
from random import choice from random import choice
bases = ["A", "T", "C", "G"] bases = ["A", "T", "C", "G"]
sequenced = '' sequenced = ''
if read_length >= len(seq): if read_length > len(seq):
for nt in range(len(seq)): for nt in range(len(seq)):
sequenced += seq[nt] sequenced += seq[nt]
for nt in range(len(seq), read_length): for nt in range(len(seq), read_length):
...@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length): ...@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length):
results = {} results = {}
for index, key in enumerate(sequences): for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length) results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.")
return results return results
import random
def generate_sequences(n, mean, sd): def generate_sequences(n, mean, sd):
""" """
Generates random sequences. Generates random sequences.
...@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd): ...@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd):
Returns: Returns:
dict: of n sequences dict: of n sequences
""" """
from random import choice, gauss
LOG.info("Generating random sequences.") LOG.info("Generating random sequences.")
dict1 = {} dict = {}
for i in range(n): for i in range(n):
keys = range(n)
seq = "" seq = ""
nt = ["A", "T", "C", "G"] nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))): for value in range(abs(round(gauss(mean, sd)))):
seq = seq + random.choice(nt) seq = seq + choice(nt)
dict1[keys[i]] = seq key = str(i) + ': length ' + str(len(seq)) + ' nt'
return dict1 dict[key] = seq
return dict
def write_fasta(sequences, file_path): def write_fasta(sequences, file_path):
""" """
...@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path): ...@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path):
outfile.write(key + "\n") outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60))) outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n") outfile.write("\n")
LOG.info("Sequencing was successfully executed.")
class read_sequencer: class ReadSequencer:
def __init__(self): def __init__(self):
self.sequences = {} self.sequences = {}
self.reads = {} self.reads = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment