diff --git a/readsequencer/cli.py b/readsequencer/cli.py index ee221c9961b25181055757da95d323cdeeaa5689..76afc5a934c4a7f3f9a082fd53cc5a27e4078f0c 100644 --- a/readsequencer/cli.py +++ b/readsequencer/cli.py @@ -4,35 +4,34 @@ from readsequencer.read_sequencer import ReadSequencer LOG = logging.getLogger(__name__) -parser = argparse.ArgumentParser( - prog="read_sequencer", - description="Simulates sequencing of DNA sequences specified by an FASTA file.", -) -parser.add_argument("output", help="path to FASTA file") -parser.add_argument("-i", "--input", default=None, help="path to FASTA file") -parser.add_argument( - "-r", "--read-length", default=100, help="read length for sequencing", type=int -) -parser.add_argument( - "-n", - "--n_random", - default=100, - type=int, - help="n random sequences. Just used if input fasta file is not specified.", -) -parser.add_argument( - "-s", - "--chunk-size", - default=10000, - type=int, - help="chunk_size for batch processing", -) - -args = parser.parse_args() +def main(): + parser = argparse.ArgumentParser( + prog="read_sequencer", + description="Simulates sequencing of DNA sequences specified by an FASTA file.", + ) + parser.add_argument("output", help="path to FASTA file") + parser.add_argument("-i", "--input", default=None, help="path to FASTA file") + parser.add_argument( + "-r", "--read-length", default=100, help="read length for sequencing", type=int + ) + parser.add_argument( + "-n", + "--n_random", + default=100, + type=int, + help="n random sequences. Just used if input fasta file is not specified.", + ) + parser.add_argument( + "-s", + "--chunk-size", + default=10000, + type=int, + help="chunk_size for batch processing", + ) -def main(): + args = parser.parse_args() LOG.info("Read sequencer started.") if args.input is not None: read_sequencer = ReadSequencer( diff --git a/requirements-dev.txt b/requirements-dev.txt index 1d44627feec16e03be395a6bef6249c9f5be1a58..36ace6490a2b11b3c74737db10abcbfc8067af35 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,4 +3,5 @@ coverage flake8 flake8-docstrings mypy -pylint \ No newline at end of file +pylint +cli_test_helpers \ No newline at end of file diff --git a/tests/fasta_testfile/test.fasta b/tests/fasta_testfile/test.fasta deleted file mode 100644 index 6aa3033526931a94577fcf3ac506baaa0a7a0283..0000000000000000000000000000000000000000 --- a/tests/fasta_testfile/test.fasta +++ /dev/null @@ -1,150 +0,0 @@ ->1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp -tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggactta -aattccctagactagatctgtgttggaacgcctctctacg ->2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp -ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttca -gttaaggtgccacacccccgggtggatcatccgtcagctt ->3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp -acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgaccc -ccggtgttctgccaccttctttggataggagaaccgtcac ->4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp -acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgat -acaactcaaaggtaactgtgcttaccacttccgaagctac ->5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp -agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctg -acactgtagaagattagatacacttgtccctaaaattaac ->6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp -tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccata -gacttatctcagacatggaccatgtcgatatcggacgccg ->7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp -actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgctt -gctaagggacacggatcaatgatgaccagacttatggtgt ->8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp -attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgta -aacttgatggctttacgcctgcacgggcttcatacacaca ->9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp -aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacg -ggcactagttgaatggggggtttttttcgtaggtcgtaat ->10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp -atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcc -tgggatctctagcctagtattatgcgTCTTCGGAGCAGGG ->11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp -cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcac -cccacgttgcgatcatggccaaggccatggtttgctcaaa ->12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp -gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagg -gttacttaatcccttcgatgctttcaaaggccctaatcag ->13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp -aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggac -attctcagtggggagcgaagagttgcgcttagagccgacg ->14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp -atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtca -tctaagcgtatcggctcatactggtggtaactagacttgg ->15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp -accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattc -cttgttaaagtccgcaccaagtgtactgtaagaatggtcg ->16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp -ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgcctt -agcaaccactgcttatctgcgtattatacctttacaatca ->17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp -attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgact -cacgtagttcccgacgttcagtcccctccaacgtggaagg ->18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp -accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtg -ggagtgtggcacttattggtaaaaggcatttttacgaacg ->19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp -ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgt -taggccctcgggccccttcatatgtacggagtcattgaat ->20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp -atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaat -ctgcagatagaagtcagccctctcacgtcaataggaatgc ->21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp -cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctt -tggccaagcgtacaagaccccggacccatacgctcccggc ->22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp -caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccgg -ttgttaactgcgggagctataacacttattccttactgcg ->23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp -atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttag -tgtttgtacacgttaaggaaaagcgttagcttaaccatta ->24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp -cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgt -caacatgggggttgctcagtttggttggtcaatcaacggt ->25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp -ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgcca -atgacagatagagccattaatcgtggaaaccaggcattta ->26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp -gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcac -gtattaatcagctatcaacagatactatcgtcacagccct ->27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp -gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgttt -ggcctctacctacgagtctacgcgggcgtttttaagcaag ->28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp -agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggat -gtcttagcgctctcgttccgatgggtgctgatactagtaa ->29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp -tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaatt -gggagtgctggcacttgtgccctacagtcaagcgctcacg ->30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp -caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctcCTTTTAGG -TGTCTAACTAGAGAGGCGCCCTATTGGGCTCAGGATGACG ->31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp -tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccg -gtctaacagggcttccgaatcaatagactgatagtaatgg ->32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp -gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgc -tcggcactgcatttttatacgtcgaatcagaaacgaggtt ->33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp -gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctg -ggcttcacgacatgttcttaaatcaatactctaaatctgc ->34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp -gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatat -tgatagtggttagaccaccggcgcatcatcgtacgagcgc ->35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp -gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatct -gcggggtaatgcaacagggggctaccagacggtaaaccag ->36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp -ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagt -tacacgccgaggtgtaaacgaatacgattgctatatgcaa ->37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp -ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtca -acgagacgacccattgtcacgttgtaaggccaccaataac ->38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp -gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattca -cttaccccttgtcaccgggcagaagagagccagtttaggt ->39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp -accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaac -ggtttgaaatcctttgcaaacttgatctgggtatatgaac ->40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp -agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgcc -gtctactcgacagggaccccccgtcggttcctctctatag ->41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp -ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtcc -ttatgcttgagacatgaatccttgccccatattggcgatc ->42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp -gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacg -gacctctctgagatctcgcaccgcgcgcccggccggcact ->43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp -ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggact -tccaaatactccaagacgtcaatacgctttatctttgtga ->44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp -gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaa -gcgcctgcctggggaatacgagcctctctacaaacttacg ->45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp -aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgacc -ttgatcgcatgtgaacccgcccaaaaacccgtctcgacaa ->46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp -aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactg -ctacgttcaccgtgttcagagatagagagtacattaggga ->47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp -gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccga -cgcggagaattccctaaccactattgtcctctgcatcgat ->48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp -catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggc -cgctacatagtacacggcgaggaatgcggggttgggctga ->49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp -taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattca -tcactgtagaccaaggaccgggcatacttgcggatatcta ->50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp -taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcg -agaagtaggacgtcgtgtaatactccaacgtcgttacgca diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..6cb9ae1d6146a9045255f46c32e764b2ce18c516 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,22 @@ +import readsequencer.cli +import pytest +from cli_test_helpers import ArgvContext, shell +import os +import glob +def test_entrypoint(): + """ + Is entrypoint script installed? (setup.py) + """ + result = shell('readsequencer --help') + assert result.exit_code == 0 + +def test_usage_no_args(): + """ + Does CLI abort w/o arguments, displaying usage instructions? + """ + with ArgvContext('readsequencer'), pytest.raises(SystemExit): + readsequencer.cli.main() + + result = shell('readsequencer') + + assert 'usage:' in result.stderr diff --git a/tests/test_read_sequencer.py b/tests/test_read_sequencer.py index a50af538b00e86ce12b004376a45b2ba1c496e3b..7157ccd670226bfe55479379fc8fea87b90ff861 100644 --- a/tests/test_read_sequencer.py +++ b/tests/test_read_sequencer.py @@ -1,29 +1,83 @@ import pytest - +import os +import glob from readsequencer.read_sequencer import ReadSequencer - -sequencer = ReadSequencer() - - -def test_chunksize(): +def test_init_default(): + sequencer = ReadSequencer() + assert sequencer.fasta is None + assert sequencer.read_length == 150 + assert sequencer.output is None assert sequencer.chunk_size == 10000 + assert sequencer.bases == ("A", "T", "C", "G") -def test_run_Input(): - assert sequencer.fasta == None +def test_run_random(): + sequencer = ReadSequencer( + output="./tests/fasta_testfile/results.fasta") + sequencer.define_random_sequences(n_seq=100) + assert sequencer.output == "./tests/fasta_testfile/results.fasta" assert sequencer.read_length == 150 - assert sequencer.output == None assert sequencer.chunk_size == 10000 + assert sequencer.fasta is None + sequencer.run_sequencing() + os.remove("./tests/fasta_testfile/results.fasta") + +def test_run_random_chunks(): + # setup class + sequencer = ReadSequencer( + output="./tests/fasta_testfile/results.fasta", + read_length=150, + chunk_size=10) + sequencer.define_random_sequences(n_seq=50) + # run sequencing + sequencer.run_sequencing() + # check results + assert sequencer.output == "./tests/fasta_testfile/results.fasta" + assert sequencer.read_length == 150 + assert sequencer.n_sequences == 50 + # clean up + result_files = glob.glob("./tests/fasta_testfile/results*") + assert len(result_files) == 5 + for file in result_files: + os.remove(file) + +def test_run_sequencing(): + sequencer = ReadSequencer( + fasta="./tests/fasta_testfile/50_seqs_50_1000_bp.fasta", + output="./tests/fasta_testfile/results.fasta", + read_length=50, + chunk_size=10000) + sequencer.get_n_sequences() + sequencer.run_sequencing() + assert sequencer.output == "./tests/fasta_testfile/results.fasta" + assert sequencer.read_length == 50 + assert sequencer.n_sequences == 50 + result_file = glob.glob("./tests/fasta_testfile/results*") + assert len(result_file) == 1 + for file in result_file: + os.remove(file) -def test_run_Random(): - assert ReadSequencer( - output="./tests/fasta_testfile/results.fasta" - ).output == "./tests/fasta_testfile/results.fasta" - assert ReadSequencer(read_length=1000).read_length == 1000 - assert ReadSequencer(chunk_size=10000).chunk_size == 10000 - assert ReadSequencer( +def test_run_sequencing_chunks(): + # setup class + sequencer = ReadSequencer( + fasta="./tests/fasta_testfile/50_seqs_50_1000_bp.fasta", output="./tests/fasta_testfile/results.fasta", - read_length=1000, - chunk_size=10000).fasta == None + read_length=150, + chunk_size=10) + # run sequencing + sequencer.get_n_sequences() + sequencer.run_sequencing() + # check results + assert sequencer.output == "./tests/fasta_testfile/results.fasta" + assert sequencer.read_length == 150 + assert sequencer.n_sequences == 50 + # clean up + result_files = glob.glob("./tests/fasta_testfile/results*") + assert len(result_files) == 5 + for file in result_files: + os.remove(file) + + +