Skip to content
Snippets Groups Projects
Commit eacf358c authored by Christoph Harmel's avatar Christoph Harmel
Browse files

Merge branch 'simulate_sequencing' into 'main'

feature added: Simulate sequencing upper level function, restructuring of package modules and functions

See merge request !10
parents 4c9976aa b68f9e5c
No related branches found
No related tags found
1 merge request!10feature added: Simulate sequencing upper level function, restructuring of package modules and functions
.DS_Store
.idea/
__pycache__/
import argparse
from modules import run_read_sequencer
parser = argparse.ArgumentParser(prog= 'read_sequencer', description='Simulates Sequenceing of a FASTA file.')
parser.add_argument('--file_path',
help='path to FASTA file', action='store_const')
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of a DNA sequences specified by an FASTA file.')
parser.add_argument('--input_file_path',
help='path to FASTA file')
parser.add_argument('--output_file_path',
help='path to FASTA file')
parser.add_argument('--read_length',
help='read length for sequencing', action='store_const')
help='read length for sequencing',
type=int)
args = parser.parse_args()
print(args.file_path, args.read_length)
def main():
run_read_sequencer(args.input_file_path, args.read_length, args.output_file_path)
if __name__ == '__main__':
main()
>1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggacttaaattccctagactagatctgtgttggaacgcctctctacgagaaggcgaacgaactggcgccgaggcgatcgctaacatcttcgtctcgcttgaaccacacaatggatgattcctccctaggggtttgacaatcaacctggatagcgtttaatatagatggctggttgatttgtaaggccttcacagactactcagagcaataagtgaccccccaacaatcagaggctgatcctctgctccgaaggcagcactcatcatcggtattctgttcgctagaacagatggaatgcatgcgccccgctaagtttgattgagttaaacttattgtcttatagccatagccaaggtatctatggtgtcgtacgtgacagttccgtgtaggcatgccatcccgcctcatgcgtcatgtcatactgaggc >2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttcagttaaggtgccacacccccgggtggatcatccgtcagctttcctacaattaggtaactggcgggatcatttagtcttgtattaagacgctcgcgcccggggcggccggcttgtttgtggagagaaacaacaagtctgagtatagattaaatacaactggtttactggcaagtcagcgcgtaacaaccggtgagccgctgcgcatgcttactgcaatgaacatcttggcacgatcctgcgatagcgtgccctgacccgtgcacctcgtcggtgaatttcgtcgaacaagcggatcgccacgccacgtgagatcaagccaaaacacaaaaaccacaggcaatagcgacgctgaagtgtcatttcctacctaaaacttcggttcttcttcttaagagggcattaagtccggataactactgcatggcacctgtgtatg >3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgacccccggtgttctgccaccttctttggataggagaaccgtcactcgccccggaggccccacggataagaagggtatcttgtgatcacgcgaatgactcacttgcgtaagtaatctaactttgtttttcgctataaa >4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgatacaactcaaaggtaactgtgcttaccacttccgaagctacatgcctctaacaaagtactttcgaggaggcactcaacccccggagatgctttgcgcggaagcagagatcgctgctcaaaatttggaatcactttcgtgcgagacccaaacaatttatggtggattcaagcgaacgagtcatgattacagatctatcaatcgaggagaggacggcttcgccgtttccttttaatgtgaaactagagccttcctcaatagtgaggcccttgcccggtgcccgggattggctcaaaagtaccggctcaggaagtctctcaactgccaagttggttaaagtagcttcggcgtaaggaacccgaccgaccatcagtgtcatacaaggaaatatttccaggaacctagtttatcttgaagttctgaatgagttaggtaatgtcggcccttcatgacagggggacgtgctcgcgagctcaaaccaagggggctaagggcgacggtgcttagtcatctactagatccacccacatgacgattgtgtgggtcctttcagccttttacttctcgc >5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctgacactgtagaagattagatacacttgtccctaaaattaacccttaaccgctattagccgtgaacgcttcctaatatttcaagccgtatagctaagtggagaatgtggagccctggtcaaatcacgagccaattagccctagacggacagcacatctcgtcgcgttaagcggaacactcagcttttattacctagtgctcagcctggtttccatatgctctaaccgaactgatgcatacttgggtctgactaagggccatggttcgcgtcaagcaggccgggttcagaagccctggttgaggggcggatactccagtcgcccgcaggtcgcgatccctgggtttaaactacttcacgacacgtacaaacacttactacctacccctacaacactgagtgaaaagtgctagctaagtatgtccgtcgagataggactcgactttagaggtctgcacgaatgtcctggtaggatgcccaccaaagggaataatccttcgggtacgttttgggcaggtgtgacgtgagaaatccgcacccttatcgccgtaaaggttatttgcggggtgcgtggttacttgagttgtccctctcgagcggggtaacaaaaacctactgtatatctagtctggtccgaaattctttatgctgccgacatgctgcgacccgactacgtgtgtaagagtcattatttcttaatagtttaacaaggctacacccctatatgataaaggctattctccatagtgtaagagtctgttgcaccgacaacccggcgtctggctacat >6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccatagacttatctcagacatggaccatgtcgatatcggacgccgtcttaccacatttttcatagcccttcataaggcagcgtgctcttactgcccaataaggtggacgattccgaccctaggcgaaccagcgctatagatggaccttctaattgatgcgcaacgtgattgtttccttggtctgggttagcatttcggtagcctaacagtcactccagttcgctaactggcctggatgagggccccatactatatggtgatagcaacgacaccccagtgtattgacctgttgtgtcctggtgatgttgaacgtcaccaagatagtctctatgtgactccatagctaaggagggtgacgtgatgcgccggccccgccccagacactgctacagaaagcttaaggcgagcgtatgaagagcctttgggcatacactctcgtatctagctaggtcaaggtgacggaatgaatctgctatatctagattggcacgcgataatctaggccaattgctggtaaaacacatggtcttattgtatacccaatcccgatttgaatttcctgcaacgaggcagctcgcagaggaacttaagtagagtgaaccctggagccgaatcccagagtcgtcggggacaaagtatatgcaacgg >7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgcttgctaagggacacggatcaatgatgaccagacttatggtgtcaggtctcactatattacatatccggaacccgtgcccgcaccacgcgctgggtctaggcgaccggtgcatcatctccgcgtctctagaggattctctcggtaaatgctgaattgcgtgagatcaaatccgtatgccagtcatg >8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgtaaacttgatggctttacgcctgcacgggcttcatacacacatgaccgtggacaaagtcgcccaggccctcgaatagggtgtaatggttaacggttagtgccaccccaatgggtgcgaggcagtaagagtgtcctatggcaaaactctcctcgtttcagaagggtcgctcctctagcctccttatcccccctataatagtactcgccgggtacgagccggagctccctcgagaagtcatcctgctcttaccacaggcagagaacgcgcaaggtttagatactaacttcattcatccaccagctggacaaggaactatagagagatgacattaggttatagctgaggggcgatcccattacccgaggctcgcaatagccttctactctgccaatgatcagtattgtaaacatggctggcgtccctaaatacaaagtcccgctgcaattgatggacttagacgaatcactagcaaagtcgataaatgtttacgctatccaatcccgcggttttaaaggtctgtactatacattcaatacggggggagatgtgtattgagccactagaggtatctaatgggggattgaaaaccgttttatagtcagtctaagcagcgccccttatgtcgctgtgatcggcagggttttttccgagatgtaaggcgaccgatatttcgttcttggcttggaagtagtacccgcgatgacctaccaagtagtccgacccattgatagactatggataccgctcccctccggtcacgaagcaccttagtgaggcatccgccgtgatgtgttgtttggagtg >9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacgggcactagttgaatggggggtttttttcgtaggtcgtaataggtactcggatagtcggcccagagttatgcttaagaatgcgctgcttaattcaatgtgactgccgttgtctccgatcagatccaggtgatgattgcgatcgcagcgacatatgtctcgaaagacgtgtcgtgaataagcctgtaagcccaatgcaacatggttccctcaccttgtagctgatgtaccgtgtttcaatctccgcggctatcgatcgccctttcatgcaagctgtaaccagacaggaatctgccctgccatcgttatgtatgcgtattacgactgattcgcgcaggcatcccggctaaggccactgggtataatccacagacattgcacgtcatatagaaaacgacctgtgttcacaatcagcccggggggagtcagagtagtat >10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcctgggatctctagcctagtattatgcg >11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcaccccacgttgcgatcatggccaaggccatggtttgctcaaaaatcccacattcgccgtcttacgcgttaggacctcactatcccacagacggtgcgttaccttgtagttgacgcgggatcgtggtgataacagctatttccgagacttcatattcttttacatagcggcttaccgtagtgactccatacattatttgcctattttgtagtgccccgaacagtaaggggaagccaactgccgcggtagctcatagacagacgtggtatacacgctacaaataagcagtggattgagacatga >12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagggttacttaatcccttcgatgctttcaaaggccctaatcagtattgagcaacgaaagcggagtcgttagtgtccaagttgc >13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggacattctcagtggggagcgaagagttgcgcttagagccgacgtacacgatataacctcaattgaaaatcgctatgtgcatcgttagggcctccggcgtgctgtttcggcagctgagtgtgagggtataacttaccttcgacccgaattgtctcgcggaaatcctaggcaagtaatccacttttggtacgggggagctagttcctctaagacgaacaagtgcactcttcacgtatagtgccctacagttgcgctgttcatggaatccgactaatagaccagtcccgaccccagtgcttcgactgttacaacagttatcgtcgcgcttcgggacgaaatctcggcattactatactcgacatacacagaaagctatggaggtcgccgtaatattcacctcgtcgagtctgtaggcgtagtaaacgttacataatagctaatgggactttcgaacggaacattatactcatcgtgaaacgtttggtcaccacactgtagaatccacctggatcggtgctagttctagtcattatgccctctaatactggtcgcgtagcagggcgcaaggtacagtgcgataccggaataggggtccacacctcacgtccacgtacctatagtcacgccgtatcgaattcattgcactccttttaagtattgagcgacgactggcacccggaatagttagtttgttcggaacgcccccacgcacgagtcgtgcgaattgaatgccgttagaacttggtgcatgcgccgtcgctactattaacggacag >14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtcatctaagcgtatcggctcatactggtggtaactagacttggtgaaccctaggtgccggcatatcgaggtccgcatccaaaataactatcgctatagctacatagacatttactcgcaatattacacgaaccgtacgtccctcggtattaacgtaatggttaaagtctctaattccgctgcagagcggcgggataaagacgccggtgtggcctgaatggtggatctgtccgtagtacc >15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattccttgttaaagtccgcaccaagtgtactgtaagaatggtcgctcgtaataataacgagaagatcctcgagccgtggtctgctgcaactaccttgagcggtacatcgatgtcccactctgggcggggatcaggggcgagacttgtggtgaggccaaagaatggcgcatatgtaggcaccatacgtcgatacgttccaggagtagaggcctcgaacatacaccacgataagtctacagacgcatagatgacgtggaactcgcatctaacccctggaattctctgactgtcaccctgagccttggtgggccaccgttagggacgtgcaactaccctttctggtgcgaatctcgtttccttagtgccttcacacaaggaggccaattgtttgctaccccatccagacatttgggcattcttgtgagtcccgtaagtcggtcatgatgggtttaagttagt >16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgccttagcaaccactgcttatctgcgtattatacctttacaatcattacatttgatctatctgtgtaccggttttttttgattcaattcgctggattacgacctcccggccaaaaattctcaattcatcgttaacagacgtatttgaagataatcattcaacgtgaactagcacttggtcacttggtacgccaaccaagctgtgctttggggcaaccctttataactcacatgccgtcctaggactttacctagtccacctagcgtgttacagataccgattgcatcaagtcctcgacggaccgcactcgtcgcagttaaaggcaggtctatcagggagataccgtgtgttgcgccaattaatcttagaaaattatgcttgcaatagagcaggaaccctgaaagaatatggttctgtgaaaaagcgtgcactcccgtgtgtgctcgttggttactagcaccgacgttgggtgggcggaggggtcatatgcctgcgccgatcgatatagcgacgctcgtatcaagttgtactgcggcaacacgggtggctcggtgtagaaataaaacgggtgccctcgggtcgaagcgaagcgcaaatgctcgtgggccggagcggccgggggcgggccactatgggcattacaagccagacaggtaagagggagtgctttgatgtgaacagcaccccttccgctggacggggttatgacaagttcgccaagattttacaatatccttaacagtggaatgtccttaccgggtttac >17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgactcacgtagttcccgacgttcagtcccctccaacgtggaaggtaggacccatctccttaacgggatcgatcggtcttcctgtgaaagttgctcagagtcctcaaggacgtttttgggtgcgtgtacggtatggttatggtacgtgtctgtgacagagggtattcttactggttaagtgacccatatgaccacctgacgcccgagcatagacctgtaggggtcgacgcgagagatggcagcttttgtcatatcatcggttcatgtcaaggttggaggaattcaggcatacacaatctcggcttagtctgcgctgctcctgtccatacctggcacttggagtcaatggattcccaaaaccgacaaatgagtcaacgctctactttttgtttgctggaacgaggcaatatccattgattcccttctcaacaaatgttatcgcggcaggaggacacaccggggccgcccgggcatcgattcgtaaccgcctgaatgtgatacaaccgataatccacttgtaagaaaatgtattctagggacttggcaccgtacggtatagtagctaatctatctaccgctagctcccggcgaaacatctggcggctaacgaccgcacacgtccaagtaattatcggcactgcgatctgagggatcatctcgcggacggagattaactagttaagaatacctatctccatcgcaatgcgactgtagccaatagctatttaaggcgtgt >18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtgggagtgtggcacttattggtaaaaggcatttttacgaacgacactgataggattgatcactcaagaaatgttctcgaccctgaggtaggagtcttaacagacggacatcctccgtagatacgtgagaattaagggacgcatgtcgaaaacgcttggaatctactgtagtggcccaccttacgcttcttccaataactcccttcatagtccggcaacctcggtgggggtttcccttaggcctcggtgattgctagaacctccgcgcaaa >19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgttaggccctcgggccccttcatatgtacggagtcattgaattagcattatactaccgttacgcaagaccctatcccatccgcgactgtcaccactgctgtaaggttgcaaggctgtttcaatgtaaagtaggcgaattctgacgtgggctgataacgaatcccccgggttatctagtgcaagtgctatcc >20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaatctgcagatagaagtcagccctctcacgtcaataggaatgctgcccgtcatgtttaactactcaagttttaaggtgtcccttatcggttccaggatcatgtctgaaggaagatggtcgcaacgaaatctggagtggcatacatcgttcggtcgaagcataatctcagacgttatctataaagttagggcgctgtatggattgggattcaagctcgaagcctgttcctgccatacagcgccttagttaggatcacgcctgaaacgtcacgacggtgctaagcatcatggggcgtggcccggacgattatccatccgctacttgcgtatggtggtgtcacccaaaatatatgtcgcgaagagtgtccgtgtcatgctaccgag >21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctttggccaagcgtacaagaccccggacccatacgctcccggctgataaactgctacagcatggtatatccggatgatgcccctgaaaactgcggaagtcaatttgttgatgaatccccgactttccgctgttcctgtggatggtcgaatgccaaatgaagagctgctccccccttctttaatatcaagcactacaaagataaagcctgtttggctgacggcgagccctcccctatcgtacgcaggatagatctggccaagtccgctgacgatggggtccacactgccagaagcgtagatctttgttgagtcggaccggaagagctaacctagctaagggtgtagagttttcaggagcttagagtcatgtcggattatggttggcgttacggacgggctccaaacgatcaaactctagtggccactttcatggccagaacggaaagagcggcgatgtctgccaagtaagaccttcactaccttccgttgattacagacgtcggtttgacagcttggggtcttatccggcgtttcagagaacttttggagcactgagcgcagacaccgacaagcttagctagacagctgaaccgtatcacttttgaaaccagagaaaacgcatagggtggttgaggtaccagaaggtgtggtttctaagttggaaaccacgtacatcactccttagatctccgaaagcgtcttcgcgtgttcggactccacatctatgcgtttactagcaagcggtttctgaccaatatgcctatgatatatcttaggtcggga >22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccggttgttaactgcgggagctataacacttattccttactgcgacggctgatccactaagaacagttcatagagctcggctatataatttgaagacatagattccacggtacttgtagcccataaccgctgaggaggaacgtccaacggttcgcgcggagcatgtgacgcttaaagg >23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttagtgtttgtacacgttaaggaaaagcgttagcttaaccattacgccccccaaagcccggtgtgtagttatctacatgccgtgtcaaagcggtgactaaatgtttatcaagttctgatgacaacgtgagctcttaaagccattgactagtataagcacggaacaatgataccaggcaagcttgaatataggataaggcctctaagctcgaagcggatcttacggaggtgtgaatcaacagcactcgagtagtacaccgtggatggttagtgaagttggtggtaaaagagtaaagggttctaacaccttaacaatgcgctacacttcaccatagccgagagtcagtatgtggtaccgttagttctttcaatcccaagagcgcataactgcttgccgccgcttagtttagggacattaatgtatatgatgaggggatgctcccttcattcggaccgaccccgacacatcgtatcctaatggctaaccgctcgcagccccctgcctgcatgcggtccgccgagcagtcgaccaagcactgtgaaagaatttgggaaatcgaacccagactaccggaacttaactcacgaccttcgatcttctgatcccagtttccatacttatatatactgcgcgaactgcagcggactttcctccggcccagcctatcgagctttgcgattgtattcgaggggcgtcggtatcttttatccaacagctgtcatcagttcggaccggggggataaaagcgccagactggatctggggtggtgtcaaccatgtgagtcaatcgtccccgagtgccgagggcctagggttcctacatggtgggtgctcctgtcgaaagaatcgagtgacgactcg >24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgtcaacatgggggttgctcagtttggttggtcaatcaacggtggcagaccatgcgataacgatgatggtaagactgtaaggtaagttaaatactctcgtctgccagttgggtcgtcaacgctgcagagacgccattcttcccagaaggtccgagctttctacagtgccgcggcgtcatgaccaaaggggtccaacctcgcagtaaaatgtctatgcttctggtttggaatgagaccgggccatcccgtgataaagagcttcatttatcagggaaagcgtcgcgtagctctagaatttatttatcttgagtcaaatgccatcatctaatgaatccactgagctggtaaggcctaggcaggcacggaggactttatagtccacgaattcgggcatccgcattatcttgttcgtccgcacttaacgactccatacccgaccctgttactctatcgagtacactacggttaaccgggcgtcattgtccacaggttcacagcaacattgggcgaagaggagtgctaactaagcgccatgccccattctaagtttacaacaaagtatttccaagcggacggtccgtgtttcagcctcatcttgcccccctggacattccgatgg >25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgccaatgacagatagagccattaatcgtggaaaccaggcatttatacttgtccgatgtatcgattctcctctatctacagagcccggacatgcgaaatatcaaaattccatgtatactgaataaatacattgggcaagccgggctcatgcagcaatcccagcgttgccttacgcaaagatatcttacggagttgcctttagattaacagcacgtgttcaaaaacctagccaactctgtcggtctagggcggaacgaagtagccagagtcgccccacgcagttcacgattacagtaatccccttatggttggggcatcgggaaattaaccctaagatgcgccccttgagcgccgaaaagggatcagttcagagtttccccccattcattgcaaggcactgttcaggcgctaacatgaggcccaaaaactagctggttacttcctgcgtcgcgcaactgttcatgtgttctttccgtacctgtgccaaagtccatgttgaggtacacccttgggtgtgttagaaagtggcttgccctcatagctgctatgggaaatttgagttgcgaccgtcgggcctcagggcgccaggtttggctagtaggcggcgtcttgtgctgcgtcaactgcgaaatgatgtccggtaggcttttatgggtcgttgcggccatgcaagagcatgcggccggttgtcggttacagagtcttagatactgtcaaactcgtacacaataaagagaggtactaatgaatcatgggcagcgcgttcatagtatgtgaaacttggcaatcagtgcacggttggccccacggtccta >26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcacgtattaatcagctatcaacagatactatcgtcacagccctccttctggcgaaggatctgagcatttgcaaagctataagttggtacgcaacggtagagggcttcgtagtcggggaaagggcttgcagtagtataggccgtaacttatctgttgcaacctcaaccgcacgaatcgattactctataactgccctcaatacagtatggttaccagtcaccttcacactgaagattaattcgcctacagaaggagaacatctaggtctccgtagaatagcagtcgtgacaacacgccgaaacttgaggcaagctcaggcgtgtgtagcgagctttcagcttaggcgggcattacctaattgttacggaccccccaaaaattgtcgactggtttcttctatgcgactataaaacaggaataggaaagtgggtgcatgcaacttgttcgtgtaccgttatgatcgattcctatgtgggagtttgcgcccacctcgtctgtgctgtcccggcgtactgcaccacgctgatttatcttgtagtaaggatggggtcaatacgagggctgaggcgtagagcccgacgggaaacacattcgtcacgccgcaaatcgcgtcgtcctcagacctcagcaagaccttttggtcagaatatggcggggctctaattgcctttacttccactccgtgaacttccgc >27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgtttggcctctacctacgagtctacgcgggcgtttttaagcaagctacgatcatcttgatccaagggtacgaggccccgcagaccaatggaggtcgtgaccaccctcgtgtatgcctcgcactaagcgagcattctggtatactgtctctctcctgtgataataacagtcggctcgatattcagttcacatgaaacagtatgttatataggtgggatggttataacacggaaaggtgaaaaagagtgcggaagttacttaggagtgccgtccttgatcaagcatgcgtagcaacaagcgcccgtaacaaccggatggaggttctgggtgaacaagggcgcccctacaggatatacaggacttgccctatggtccatttatagtatggtggtataccccggctcacctgtgaattagattgcgaaccaaataaaggatcatcgggttcacatttaggttagagccctcatacgttaacctgccggtacaccacttctttcgccgccgcatagtacatgc >28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggatgtcttagcgctctcgttccgatgggtgctgatactagtaaatgagactcgagaccgagaacacgcaacggctacaacctggtcggttgttggggtttttataatcagtg >29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaattgggagtgctggcacttgtgccctacagtcaagcgctcacgcggtgttctcctcccgcaatcttagatattaggctctgtaccgcacgaaggatgaattttcttgactattggtccctgtttacgagggcttacctagagtgaggatgaacataaacaaggcctacttgacttaaggcttccaaatcacttgagggcaaatgactcctcaaacgcgagtgccagtactatccgtgagggaagaaaatctgccaaaaggccttggccgagatagattccgccgcccagcttacggggcatggtctataagttccgttttagcatgtactgtcaacaagcctgcgggatcc >30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctc >31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccggtctaacagggcttccgaatcaatagactgatagtaatgggatcctgaggctgggacccgacacacggcatattttactagaaacgctgatttaaactccaattatccttgacgcactgagccacagtcttagacgcagaatgtccgcaggagccctgtctttcccctaaatcattcgcggcatttgtttacgggttaagtcctgcggatcctagagtctgggccccgtacaaccaggaagagactgatactccgcgtattacggcccataagaacggtgggcctcgtttgtatttgactactgtacactcctgcctactgctgaacttaatgatgcgagatgaaagtcacagggggtgtagatcaagttgcacttaggtttcttccgatagattactatgcaagctatccacatgtgagaagctatagccacctctcttaacttctggtaagggcgcattgcggagcggcgagtacatatggtcttaatggacgccggtcgccgtccagatggagatagc >32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgctcggcactgcatttttatacgtcgaatcagaaacgaggttcctcctctaggcttgttaaaaatccgggcgcgatgggctggtaatctgtggccatgggagcctcgccatttaaagattttggttaaggctcctctgttgtgtccatcacccttgaacgagcccgtacaaaccgtgtacgatgttgacac >33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctgggcttcacgacatgttcttaaatcaatactctaaatctgctttgtagcatgcctcaagtaaaaaaatgtgctggttccgcacaggtgtgacgattaacgttgcgcccgtttgcgtcagtccagatcaccgatcttccacaccaccggtgggctgccggactgcaggtaatgactcctggctgcattctctgacataaaggttgaatagaacggcgtccttgagaaggttatggaacg >34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatattgatagtggttagaccaccggcgcatcatcgtacgagcgcgggcgatacgtgtctttcaccggcgcactaatcttatcttacttctcaagccccgacagcatgtacgccaagtgttgttctgatgaaactttcgaaatagcaactgttagtcagttatagttggggagggcagtgaatacctcaaatacacccaagaaataacttcgaagcggcgcctatatcacacccctgtttcttatgactggtttgcgtgtgctaacagcaatcaagtacctgaccgtatgtccttgaagcttgaggatagtacccggatccagaggactgaaaaccgtgtctacgctgttctcacgccgatgtttgaaataatgagtgtagcgtctgccaaactggcttaagcactcagcgtgaggcgagattctatggccttgcgttttcgtttcgcgcgaacggtgacaatccagaaccccgaccttaaatatgcgacgtaaccctcctggccccgtccgagtgaa >35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatctgcggggtaatgcaacagggggctaccagacggtaaaccagggtcttgctattggtgttacgaaacaaaggagctatgcgacctcattagatcgagattactctcacaggcagctccggccatagcacaactaatttcgggtgtggagctcaccacaggaacatcttgtgcgtcctttgttatttaattgtgcattgtaatgcaccggaccccgggaacatacagccattatctgtgttgccgctcatccgttgtacttcttaatacaatcagaattgtactcaaccgattgccaagcacgtacgcgtcagatacacatccggagtcagtcctcgtcctgctttgactcatgccaaggagtgcgcttcgcgcgggtgaatctcgttatcgatttatagtatttatcttatcgcggaagaccacgctagtagactgggtaacgtcgcgattgtcccaaagccagagtgaagataggcgacatcctctgtgagaggggtacc >36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagttacacgccgaggtgtaaacgaatacgattgctatatgcaacgagttggttacacgcgtgaaggcgaatgtggatgctgcacttggagtcccattttaccggccgcacgtgctagctcactcaccttg >37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtcaacgagacgacccattgtcacgttgtaaggccaccaataacacacaggtcttcgtttgctgtctcagggcaatcgcatcgacaacatcgtatggataccgttttttatcagcttacggcgcatcatactaataaggtgtttgagagggcgcagactcgaagcagtgtgatcttcccggttcgaagatgcaaaaacggtcctatttcgatccaaaactcagcgcactagtccaatgcttttttggagggttttgtagaacaatcgaggcgcggagcagcgaaatagaaaacgggccagtgaacgacggatccacacggaggtttcactcgaggacgtgtgccccaacaaccggttatctccttacttattcaagtgtgcctgcacctcgataagtctaaactccgcctatcccagcgtaggtcatagtcgtaactcccaacaggtgccctgcgacttgttcctcgccggtgctgcaagaaggtagtgttct >38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattcacttaccccttgtcaccgggcagaagagagccagtttaggtgtggttgtatttgccaaaccgcaaaccgcctaatgagctggatccggccatggaattaatcccgtcgtttgactcgaggtgttcaaagactgtgcaacacgacgtgcattcatcactagaacttaatctagaccaggccttgtggccaggagaggcgacgtgatattgccctatacacagataattatatacccctcgcgcgcaaaccatctcggtctctttccaaggtgccagcacgcgataactcgtatctgggctggatgtgcgtttcccttagcccactcccccttttaagtactagcgtactcgggttctacggagtgcatggagtttccacaaagggacgcaacataatttaaagaaccgagccttacgaggagcttttgcaggcttccgtcgctatccgtcgtcatggagtgaggctttgaggaacgagcacttgggactctatataccccggagtaagtatctacagccggggtctgacgccaaccatttgttactttgttgcgagggctactcccgctagtagtagaactgctgtcaggcaacgacactaattaagtggccttgacccgtacgacttgagaatcttcggttcaatattccccgtctcgaaaggctgcttcaagtgcgctacacattacatacaactaggcgggagggctccaaaccggcggagctacaaggagtctaatagtgcgaaaaaagggccgcgcgacaaatgagtctggtgtggcattcggaacgagtgggaacgatccgaatccgctttgacaattaaccgtcctaattaggatttcgtgaatagtagtg >39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaacggtttgaaatcctttgcaaacttgatctgggtatatgaaccggtatgcggggatagtggtaaataagtagtttacgagctgagcgtggattatcccagagaagttgccttaggtccagagcccgcacctacaatcactcgaggccggtcgagcgttgcgtggcaaggaaacccagccggtcaccctaccctcaaactcacgtcattgatccaatcatacatggcgtctctcacggtggtgttgtttgctgtttcttgcggcccgtttattcgtgaacacgacgcaagccctaccgacctcgctagccgatctagacgactgggtgggttacccttcccagaggagtgactatggatatgtagtccttataggcatccagggcaccggatgcactagtcacacccctgctcagatagcgccaaaaagtgaattcaagcgttcagctggacacccattaaacacgagtgctactgggcttacataatacgagagaagattggccgattgttgcccttagaacttatgtgaggtaagtctgagacgccgattgcggcctagacattagtaaaaataagataagaactactcccactgactcgttcgggcctcctagagctagggcccccctgagcatgttcagttatatcctacgggctagcgtaaggttttttcgtttatgcgaggcttgagacgcgacatacgagcattccgttgctggggcgatagaccacgatacgctcagaagagggaatactaaattgataaaatgctttcattgtctagcacct >40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgccgtctactcgacagggaccccccgtcggttcctctctatagcaatcgcggaagtggttccctgcctcccgcgcagaagttcaaactagtaatccttaatgacttgtggggggggagatcagtttcttccacaatggagtaaacttatgcgagaatcaagatcgcagaggccattttttgatgatactgtcagatatgtggttagccgtatcacgttaccgacgcagaatt >41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtccttatgcttgagacatgaatccttgccccatattggcgatcttggccaatgagatctgtcgaaagtactggaggccggtaaattgggggctctagaggtccgcccctgaaggactaacgtgtgtgtgtgtctacgtgtcgggttatcagcgtgttggacgatggccgtggattcaacgcatgctagagagctaatgatcctccgaagtcaaaagcctcagtgcttcgatttatgagcgcgtggcgagtacgtctagtgatactctaactactataaacaaggcctcgtcgcagaatccttcaatacattgggccccgggagataagtcggaccaggactaaattacacatgggggccctaaccctaggtcttaacggatggcttgataaagcacgcgctaatcttccctatcgtgcacatatctcttgctaccccctgctaaatcgctcgggccttggcctacacaatatgcaactggaagtgtggtttcgcaagggtataaataactgaactgtgaagttcccctgttacaaagccattagccgctaatgaacatagacctaacggactcgctccttgtgct >42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacggacctctctgagatctcgcaccgcgcgcccggccggcactatcgatgctagactagggttggtgactagcccgtcaaaaccagcctaaacgcaaagattgtaggcgctagtccggaactgactgcttcgtgtcggtgggagcctagtatgtttccgggtctatgacccctaaaatcatagacgtgtcttaatagctatacctgacttactttgaagtacttgccacgacgagtttatgagatattagtattctcagccttgtgctcctcctacgaccgggatgagtcatctggtcaccttgatccgtacggaactatagatagtcacttcgaggcatgcgcgtttggacacctactgcttctaagtcatagcgctcgcgtaatgcagcctcgcatgttctttacacgacacgagggattttgattgttataggtgaacgtacagacaaaatcactgtttcagaatacttggcttgtacgatctccagtactccccgtgccggctcggcgaacgggataagacatccacggcattctgtagtggttgaccgggtttgacagactcccttatctggatggggcccgataacgatgagcatagaaccgttgtaagtctcgattgtcacccgaggacaaaattttctcatcctaggttcactcagcgtcgttagagcatcagagttccgtctttaggttactttaaagatcgaaagaaaccttcgtgctggtaggaagtctcatataagtagccagcgtggaccgaggaatagattgttatgttgcatgtacttctgggatcgtgtccagccaacttcaccgcggcacacgtcgatggacccaggaaatgcctcggggtcagaatgagccttgtgcggcccgactgctgaacgacgtataaaatagagcgtcgtggaccccatacaacgcacataac >43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggacttccaaatactccaagacgtcaatacgctttatctttgtgaagtcatcccggaccgagcgcttgggtcgtgatttaaaatcccctgtgatgtggctacaggtgcggcctatacagccgagaagaaggccgtctttaggcgtccaatgaaccgttacagggacacaccaaactgcgccaactgatcccacgggtcacggtacgctctaagaccagtcgggattctgacttaacatcgcagcatctgatcgagtggcttctccagcgagcctagggcattacaccgtgcgttcgcaaactctgtatgcttgtcggaaggtatagcttgagcccctttggttcatatcgttaatacttaagtaagtggggtcaatctttttgaactaatcaaatgactcactggcgaaaggcagacg >44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaagcgcctgcctggggaatacgagcctctctacaaacttacggccaccatgcttaaagattcggtgacttcactaatgacctatacaagtaatgcggaggacgctgtcgcttattgctctttgctaaggccagttatgtccgtcagtcaacgatacgctgcggcggtgggtgacggcactagaccggaagcctgatgacaagttcgaatcaata >45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgaccttgatcgcatgtgaacccgcccaaaaacccgtctcgacaaaagttacgtcgcatgggctgcgccaccggatagctcctagcttatcttataaatcaggtagagctacaacatggtgctatgacaactggagtgtcatcgctttggcgaaaccgtaaagggtgggaattgctgcattctcaactgggccgaactattccgcattcggctgctcacaaatcgtggaatgtgtccttgaacgtcctgcttaaccatggtcattgccacgaaggccctggtcggttcagaagtgtatcagacttacaagggtccgaggaggttccggcggggggagaacaagcatcgaacacgactcactgacctgtaggggtattacctatcactgtgacccacatctgaggtactggtccattccataaagatcgagtcgtcttcctaaactgggcactcatacgtacaggaccaaaaaagaggttggttggtgaccgtgccacgcctggcggtcagaatgagttaatgggtcgaataggcgatcttgataacaatagagattcacaacgtgggctcaagccgcttcctgcgaccactattagcgaactagggctatcgccacggaaaagtacttctaacatatacccctatggagtttcagtgtgaagccactgagcagtggcgaggttgtcgcgtcttttgttcaattgttccctgtactttaatgttcgtatcggatttgctttccgttgatagcgaatctctaccctgtcgctgtgtctagcatccgtccgcgaagccggtgacacat >46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactgctacgttcaccgtgttcagagatagagagtacattagggaccaatcacaacgttcgccagggcaccgcctaatccgcgttgttagcaagagtacaggctctcgtatactttcagacccttcaatactagacgacaaattgcagcccggggtcatcggtcgactcagatacgtgctaacgagtaccaggtctaccgttgcaacgttggatgcgttatactcggcataaggcgatgccctttgacatactgcactacgctttggcgatagcgctacagttgatgaccgggctaactgcacgcgcgcgtagacgggagcccaattttttgaaattggcgacgaccgacttacgcacctatgcctcgcacgaccagtagctgtaagctctggtcgttggcattagattggcgaaccgctgaccacgatttaagccttccaaatcgttctactatacgagctcagcgtggcggtatgctagttgataaagtaaggacgttctcccgggtccagaagcccgatccacttttaaaagggctgatgcataaactaagactgtatgtctggttcaagtcttcagtaacttcagcctatgtttccaagtgacaacttggagaggggatgctgcggattctgatctaagctgttatatatctgttatcacctcgaaactatcactctggatgaccctcctatgtgcgctcagcgtagaccttccgctgtactttcataaacatagcatggagtgactcagaagagctttaaaggcgggagatcataggtcgcggtcgagcgaatgggaccgaggggaatgccaccaattcccgct >47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccgacgcggagaattccctaaccactattgtcctctgcatcgatatcaggaataggcttacctgcaatctcttatggtgatagactgtttgggagctgaacctgagacgcgcacgaaatttggaaggatcaaataggccccgcagtctctggtagacttctgccgagcggactagcttggctaaggtgtacaagcctaaatcgtttttcacatcaattttatagctgattatagaggaacgacgcgatcgtgcagagtgatggtcaaagggtcggtacgtcggatgcccagaaatatggtctgaggggtagcctgttcagaggcgcttaacttgctccttgctcacaggagcgatatgcgctagggttctggacgatcgaccatcattgtaacccatccaatccgtccttattgatggcccactcccgcatgctggtccgaaggcgatcccgatatcccgagcactagaattcgacacagtctgtaccgtgcctaattcttatcagaccctttatgcccgtctcggccttagagttaaatggactatctccacggaatgggcagtgcatcgctaccaagggtcgcccgatcccggggtttccacttcggatcatttttgtggatagtacatatcccacttcaacaagatagcaaaagtccaagacgcagtagagcacgtttctcaaacaacggaccatgcacggttcccggtaacccagtccagggaacaatttgtggttcttctttgaactagttgggagcaacagactcaggggctggccagtagtattgaaccaagcgttttttacaattactttgctggcgctaactg >48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggccgctacatagtacacggcgaggaatgcggggttgggctgaaccgtacacagtgggctagctgcggtacctgccaccggcatgcgtttaaatcctttcctttggcgaagccaactgccgacgtccgcaacagagactcgttttccgaccccgttactaaatcagctaactggcgcctgaatcctcttacgtcggatgttaattagtgtatagaatatcggagggttgagtgcgacgcgcttcctgttctccgctacttcttgtattatgatttggtcaaatacagtcgacaatagtgctcgacaggatataacgctatggcaccccatagtatcagctaaacgttcatatcctagtagcttagaaagaaatttaatatgcagtggagcctggaaccttacttattgtcggctcatcccgcaatactcgcaaataatcacgcatggtgccggacggtacgggccacgtcaattaggtaaacgaaacacgttacgtggactgctcacccaaccttgagggactgtctactt >49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattcatcactgtagaccaaggaccgggcatacttgcggatatctaccaggactaggcacttagggatacgctgttgaatacgggtttcgtcccgtgtactcaagtgtagtttaagataggtacgagtgctagtacatcgtacaatttacaactgacttaaacgagagtttattatgtcttgttcacttgttgacacgcctgggaaaataataaaaggcaacgtctaatctcagacccgttgattaactaagcagcgtgacgtggagtcatacctgctatatttgggaggtgggaagtattggtgaaccgagcccctcctagccgtggcggtaatgacattaagaaggcgcagttagtcagcactcgaggcaggtgcgcctctgcacgtctgctgatatcgtcggaacgagttaacgctcccgcccacccatcagtcagggaccactttcgacacctagttcgagatttctcctttcggtaaaatcgggctcaattaaggcttgggtaccccggccagggaatatgcacatgagggaactatgagactgcacctaccccgcacggatggagg >50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcgagaagtaggacgtcgtgtaatactccaacgtcgttacgcaatgttgtaaaacttcatcgcattccgtgcatggcctaaacgtgcagcattatataacgctctttggtcttaatatccatcgcgggagtaacgcgaaggggagacgtgtgcctga
\ No newline at end of file
def read_in_fasta(file_path):
'''
This function reads in FASTA files
argument is file_path
it returns a dictionary with the sequences
'''
sequences = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
f.close()
return sequences
def read_sequence(seq, read_length, padding_probabilities=None):
'''
This function reads sequences
arguments: seq is a list of sequences
padding_probabilities is a number??
returns sequenced element
'''
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq),read_length):
sequenced += choice(bases)
else:
for nt in range(read_length):
sequenced += seq[nt]
return sequenced
def simulate_sequencing(sequences, read_length):
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key],read_length=read_length)
return results
def write_fasta(sequences, file_path):
"""
Takes a dictionary and writes it to a fasta file
Must specify the filename when caling the function
"""
from textwrap import wrap
with open(file_path, "w") as outfile:
for key, value in sequences.items():
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
def run_read_sequencer(input_file_path, read_length, output_file_path):
sequences = read_in_fasta(input_file_path)
reads = simulate_sequencing(sequences, read_length)
write_fasta(reads, output_file_path)
'''
This function reads in FASTA files
argument is file_path
it returns a dictionary with the sequences
'''
import sys
def read_in_fasta(file_path):
sequences = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
return sequences
'''
This function reads sequences
arguments: seq is a list of sequences
padding_probabilities is a number??
returns sequenced element
'''
import random
def read_sequence(seq, padding_probabilities, read_length):
reading_element = random.choice(seq)
bases =["A", "T", "C", "G"]
if read_length > len(reading_element):
for nt in [0:len(reading_element)]:
sequenced += reading_element[nt]
for nt2 in [len(reading_element):read_length]:
sequenced += random.choice(bases)
else:
for nt in [0:read_length]
sequenced += reading_element[nt]
return sequenced
\ No newline at end of file
from setuptools import setup
from setuptools import setup
setup(
name='awesome_read_sequencer',
version='0.1.0',
author='An Awesome Coder',
author_email='aac@example.com',
packages=['random', 'sys'],
scripts=['read_in_FASTA.py','read_sequence.py']
license='LICENSE.txt',
description='An awesome package that simulates sequencing of a FASTA file.',
long_description=open('README.md').read(),
install_requires=[
"random",
"sys"
],
entry_points = {
'console_scripts': ['read_sequencer_package/cli.py:parser'],
setup(
name='awesome_read_sequencer',
version='0.1.0',
author='An Awesome Coder',
author_email='aac@example.com',
packages=['random'],
scripts=['cli.py', 'modules.py'],
license='LICENSE.txt',
description='An awesome package that simulates sequencing from sequences specified by a FASTA file.',
long_description=open('README.md').read(),
install_requires=['random', 'sys'],
entry_points={
'console_scripts': ['read_sequencer=read_sequencer_package/cli.py:main']
}
)
\ No newline at end of file
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment