diff --git a/read_sequencer_package/cli.py b/read_sequencer_package/cli.py deleted file mode 100644 index 0fdd614691291bd71000c36d30f36d796663885b..0000000000000000000000000000000000000000 --- a/read_sequencer_package/cli.py +++ /dev/null @@ -1,40 +0,0 @@ -import argparse -from read_sequencer import ReadSequencer -import logging - -parser = argparse.ArgumentParser(prog='read_sequencer', - description='Simulates sequencing of DNA sequences specified by an FASTA file.') - -parser.add_argument('output', - help='path to FASTA file') -parser.add_argument('-i','--input', default=None, - help='path to FASTA file') -parser.add_argument('-r','--read-length', default=100, - help='read length for sequencing', - type=int) -parser.add_argument('-n','--n_random', default=100, type=int, - help='n random sequences. Just used if input fasta file is not specified.') -parser.add_argument('-s','--chunk-size', default=10000, type=int, help='chunk_size for batch processing') - -args = parser.parse_args() - -def main(): - LOG.info("Read sequencer started.") - if args.input is not None: - read_sequencer = ReadSequencer(fasta=args.input, output=args.output, read_length=args.read_length, chunk_size=args.chunk_size) - read_sequencer.get_n_sequences() - else: - read_sequencer = ReadSequencer(fasta=args.input, output=args.output, read_length=args.read_length, chunk_size=args.chunk_size) - read_sequencer.define_random_sequences(n=args.n_random) - - read_sequencer.run_sequencing() - - LOG.info("Read sequencer finished.") - - -if __name__ == '__main__': - logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', - level=logging.INFO) - LOG = logging.getLogger(__name__) - main() diff --git a/read_sequencer_package/read_sequencer.py b/read_sequencer_package/read_sequencer.py deleted file mode 100644 index 6d1a81a8d59de9d1e8d7a0de2c5a5208f5ce68a7..0000000000000000000000000000000000000000 --- a/read_sequencer_package/read_sequencer.py +++ /dev/null @@ -1,146 +0,0 @@ -import logging -from random import choices -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -from collections.abc import Generator, Iterator - -LOG = logging.getLogger(__name__) - -class ReadSequencer: - def __init__(self, fasta: str = None, output: str = None, read_length: int = 150, chunk_size: int = 10000) -> None: - """ ReadSequencer class - Args: - fasta: path fasta file - output: path output fasta file(s) - read_length: read length, defaults to 150. - chunk_size: batch size used for memory efficient processing, only used when number of sequences greater - than number of passed sequences. Defaults to 10000. - - Returns: - None - """ - self.fasta = fasta - self.output = output - self.read_length = read_length - self.chunk_size = chunk_size - self.random = False - self.bases = ('A', 'T', 'C', 'G') - self.n_sequences = None - - def get_n_sequences(self) -> None: - """ - Helper function to detect number of sequences present in set fasta file. - - Returns: - None - """ - self.n_sequences = len(list(SeqIO.parse(self.fasta, 'fasta'))) - - def define_random_sequences(self, n: int) -> None: - """ - Defines random sequences. - - Args: - n: number of random sequences to be generated - - Returns: - None - """ - self.random = True - self.n_sequences = n - - def generate_random_sequence(self, length: int) -> Seq: - """ - Generates random sequence. - - Args: - length: length of sequence - - Returns: - random sequence of length n - """ - seq = choices(self.bases, k=length) - seq = Seq(''.join(seq)) - return seq - - def resize_sequence(self, record:SeqRecord) -> SeqRecord: - """ - Resizes sequence according to set read length. If sequence is shorter than read length, fills up - with random nucleotides. - - Args: - record: SeqRecord - - Returns: - resized SeqRecord - """ - if (len(record)) >= self.read_length: - record.seq = record.seq[0:self.read_length] - else: - n_add = self.read_length - len(record) - add_seq = self.generate_random_sequence(n_add) - record.seq = record.seq + add_seq - return record.seq - - def batch_iterator(self, iterator: Iterator, batch_size: int) -> Generator: - """ - This is a generator function, and it returns lists of the - entries from the supplied iterator. Each list will have - batch_size entries, although the final list may be shorter. - - Args: - iterator: iterator object generated with Bio.SeqIO.parse() - batch_size: batch size to use for the generator - - Returns: - list of entries from supplied iterator according to batch_size - """ - batch = [] - for entry in iterator: - batch.append(entry) - if len(batch) == batch_size: - yield batch - batch = [] - - def run_sequencing(self) -> None: - """ - Runs read sequencing of specified sequences from input fasta file or generates random sequences for a given - read length. If number of sequences exceeds chunk-size, it will switch to batch processing mode. - - Returns: - Writes processed sequences to output fasta file(s). - """ - if self.random: - if self.n_sequences < self.chunk_size: - with open(self.output, 'w') as output_handle: - for i in range(self.n_sequences): - record = SeqRecord( - self.generate_random_sequence(self.read_length), - id='random_seq: ' + str(i+1)) - SeqIO.write(record, output_handle, 'fasta') - else: - for i, batch in enumerate(self.batch_iterator(range(self.n_sequences), self.chunk_size)): - filename = self.output.replace('.fasta','') + '_chunk_%i.fasta' % (i + 1) - with open(filename, 'w') as output_handle: - for j, k in enumerate(batch): - record = SeqRecord( - self.generate_random_sequence(self.read_length), - id='random_seq: ' + str(j+1)) - SeqIO.write(record, output_handle, 'fasta') - else: - if self.n_sequences < self.chunk_size: - with open(self.fasta) as input_handle, open( - self.output, 'w') as output_handle: - for record in SeqIO.parse(input_handle, 'fasta'): - record.seq = self.resize_sequence(record) - SeqIO.write(record, output_handle, 'fasta') - - else: - record_iter = SeqIO.parse(open(self.fasta), 'fasta') - for i, batch in enumerate(self.batch_iterator(record_iter, self.chunk_size)): - filename = self.output.replace('.fasta','') + '_chunk_%i.fasta' % (i + 1) - for j, record in enumerate(batch): - batch[j].seq = self.resize_sequence(record) - with open(filename, 'w') as handle: - SeqIO.write(batch, handle, 'fasta') diff --git a/read_sequencer_package/__init__.py b/readsequencer/__init__.py similarity index 100% rename from read_sequencer_package/__init__.py rename to readsequencer/__init__.py diff --git a/readsequencer/cli.py b/readsequencer/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..ee221c9961b25181055757da95d323cdeeaa5689 --- /dev/null +++ b/readsequencer/cli.py @@ -0,0 +1,65 @@ +import argparse +import logging +from readsequencer.read_sequencer import ReadSequencer + +LOG = logging.getLogger(__name__) + +parser = argparse.ArgumentParser( + prog="read_sequencer", + description="Simulates sequencing of DNA sequences specified by an FASTA file.", +) + +parser.add_argument("output", help="path to FASTA file") +parser.add_argument("-i", "--input", default=None, help="path to FASTA file") +parser.add_argument( + "-r", "--read-length", default=100, help="read length for sequencing", type=int +) +parser.add_argument( + "-n", + "--n_random", + default=100, + type=int, + help="n random sequences. Just used if input fasta file is not specified.", +) +parser.add_argument( + "-s", + "--chunk-size", + default=10000, + type=int, + help="chunk_size for batch processing", +) + +args = parser.parse_args() + + +def main(): + LOG.info("Read sequencer started.") + if args.input is not None: + read_sequencer = ReadSequencer( + fasta=args.input, + output=args.output, + read_length=args.read_length, + chunk_size=args.chunk_size, + ) + read_sequencer.get_n_sequences() + else: + read_sequencer = ReadSequencer( + fasta=args.input, + output=args.output, + read_length=args.read_length, + chunk_size=args.chunk_size, + ) + read_sequencer.define_random_sequences(n_seq=args.n_random) + + read_sequencer.run_sequencing() + + LOG.info("Read sequencer finished.") + + +if __name__ == "__main__": + logging.basicConfig( + format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', + level=logging.INFO, + ) + LOG = logging.getLogger(__name__) + main() diff --git a/readsequencer/read_sequencer.py b/readsequencer/read_sequencer.py new file mode 100644 index 0000000000000000000000000000000000000000..1868adff183eb74b771226d38b51830811f4e44d --- /dev/null +++ b/readsequencer/read_sequencer.py @@ -0,0 +1,164 @@ +from random import choices +from collections.abc import Generator, Iterator +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + + +class ReadSequencer: + """ReadSequencer class + Args: + fasta: path fasta file + output: path output fasta file(s) + read_length: read length, defaults to 150. + chunk_size: batch size used for memory efficient processing, + only used when number of sequences greater + than number of passed sequences. Defaults to 10000. + + Returns: + None + """ + + def __init__( + self, + fasta: str = None, + output: str = None, + read_length: int = 150, + chunk_size: int = 10000, + ) -> None: + + self.fasta = fasta + self.output = output + self.read_length = read_length + self.chunk_size = chunk_size + self.random = False + self.bases = ("A", "T", "C", "G") + self.n_sequences = None + + def get_n_sequences(self) -> None: + """ + Helper function to detect number of sequences present in set fasta file. + + Returns: + None + """ + self.n_sequences = len(list(SeqIO.parse(self.fasta, "fasta"))) + + def define_random_sequences(self, n_seq: int) -> None: + """ + Defines random sequences. + + Args: + n_seq: number of random sequences to be generated + + Returns: + None + """ + self.random = True + self.n_sequences = n_seq + + def generate_random_sequence(self, length: int) -> Seq: + """ + Generates random sequence. + + Args: + length: length of sequence + + Returns: + random sequence of length n + """ + seq = choices(self.bases, k=length) + seq = Seq("".join(seq)) + return seq + + def resize_sequence(self, record: SeqRecord) -> SeqRecord: + """ + Resizes sequence according to set read length. If sequence is + shorter than read length, fills up with random nucleotides. + + Args: + record: SeqRecord + + Returns: + resized SeqRecord + """ + if (len(record)) >= self.read_length: + record.seq = record.seq[0:self.read_length-1] + else: + n_add = self.read_length - len(record) + add_seq = self.generate_random_sequence(n_add) + record.seq = record.seq + add_seq + return record.seq + + def batch_iterator(self, iterator: Iterator, batch_size: int) -> Generator: + """ + This is a generator function, and it returns lists of the + entries from the supplied iterator. Each list will have + batch_size entries, although the final list may be shorter. + + Args: + iterator: iterator object generated with Bio.SeqIO.parse() + batch_size: batch size to use for the generator + + Returns: + list of entries from supplied iterator according to batch_size + """ + batch = [] + for entry in iterator: + batch.append(entry) + if len(batch) == batch_size: + yield batch + batch = [] + + def run_sequencing(self) -> None: + """ + Runs read sequencing of specified sequences from input fasta file or + generates random sequences for a given read length. If number of + sequences exceeds chunk-size, it will switch to batch processing mode. + + Returns: + Writes processed sequences to output fasta file(s). + """ + if self.random: + if self.n_sequences <= self.chunk_size: + with open(self.output, "w") as output_handle: + for i in range(self.n_sequences): + record = SeqRecord( + self.generate_random_sequence(self.read_length), + id="random_seq: " + str(i + 1), + ) + SeqIO.write(record, output_handle, "fasta") + else: + batch_generator = self.batch_iterator( + range(self.n_sequences), self.chunk_size + ) + for i, batch in enumerate(batch_generator): + filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % ( + i + 1 + ) + with open(filename, "w") as output_handle: + for j, k in enumerate(batch): + record = SeqRecord( + self.generate_random_sequence(self.read_length), + id="random_seq: " + str(j + 1), + ) + SeqIO.write(record, output_handle, "fasta") + else: + if self.n_sequences <= self.chunk_size: + with open(self.fasta) as input_handle, open( + self.output, "w" + ) as output_handle: + for record in SeqIO.parse(input_handle, "fasta"): + record.seq = self.resize_sequence(record) + SeqIO.write(record, output_handle, "fasta") + + else: + record_iter = SeqIO.parse(open(self.fasta), "fasta") + for i, batch in enumerate( + self.batch_iterator(record_iter, self.chunk_size) + ): + filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (i + 1) + for j, record in enumerate(batch): + batch[j].seq = self.resize_sequence(record) + with open(filename, "w") as handle: + SeqIO.write(batch, handle, "fasta") diff --git a/setup.py b/setup.py index 0c45a41200918be3c3fc4c3eb2001c110507e822..39b68b402d8f77d09a6446ace93303ab86598825 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup( - name='read_sequencer', + name='readsequencer', version='0.1.1', url='https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer', license='MIT', @@ -9,6 +9,6 @@ setup( author_email='christoph.harmel@unibas.ch', description='Simulates sequencing with a specified read length from sequences specified by a FASTA file.', packages=find_packages(), - install_requires=['random','Bio','argparse','logging'], - entry_points={'console_scripts': ['read_sequencer=read_sequencer_package.cli:main']} + install_requires=['Bio','argparse'], + entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']} ) diff --git a/tests/fasta_testfile/test.fasta b/tests/fasta_testfile/test.fasta new file mode 100644 index 0000000000000000000000000000000000000000..6aa3033526931a94577fcf3ac506baaa0a7a0283 --- /dev/null +++ b/tests/fasta_testfile/test.fasta @@ -0,0 +1,150 @@ +>1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp +tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggactta +aattccctagactagatctgtgttggaacgcctctctacg +>2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp +ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttca +gttaaggtgccacacccccgggtggatcatccgtcagctt +>3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp +acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgaccc +ccggtgttctgccaccttctttggataggagaaccgtcac +>4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp +acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgat +acaactcaaaggtaactgtgcttaccacttccgaagctac +>5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp +agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctg +acactgtagaagattagatacacttgtccctaaaattaac +>6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp +tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccata +gacttatctcagacatggaccatgtcgatatcggacgccg +>7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp +actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgctt +gctaagggacacggatcaatgatgaccagacttatggtgt +>8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp +attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgta +aacttgatggctttacgcctgcacgggcttcatacacaca +>9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp +aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacg +ggcactagttgaatggggggtttttttcgtaggtcgtaat +>10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp +atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcc +tgggatctctagcctagtattatgcgTCTTCGGAGCAGGG +>11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp +cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcac +cccacgttgcgatcatggccaaggccatggtttgctcaaa +>12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp +gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagg +gttacttaatcccttcgatgctttcaaaggccctaatcag +>13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp +aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggac +attctcagtggggagcgaagagttgcgcttagagccgacg +>14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp +atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtca +tctaagcgtatcggctcatactggtggtaactagacttgg +>15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp +accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattc +cttgttaaagtccgcaccaagtgtactgtaagaatggtcg +>16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp +ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgcctt +agcaaccactgcttatctgcgtattatacctttacaatca +>17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp +attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgact +cacgtagttcccgacgttcagtcccctccaacgtggaagg +>18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp +accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtg +ggagtgtggcacttattggtaaaaggcatttttacgaacg +>19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp +ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgt +taggccctcgggccccttcatatgtacggagtcattgaat +>20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp +atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaat +ctgcagatagaagtcagccctctcacgtcaataggaatgc +>21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp +cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctt +tggccaagcgtacaagaccccggacccatacgctcccggc +>22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp +caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccgg +ttgttaactgcgggagctataacacttattccttactgcg +>23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp +atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttag +tgtttgtacacgttaaggaaaagcgttagcttaaccatta +>24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp +cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgt +caacatgggggttgctcagtttggttggtcaatcaacggt +>25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp +ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgcca +atgacagatagagccattaatcgtggaaaccaggcattta +>26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp +gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcac +gtattaatcagctatcaacagatactatcgtcacagccct +>27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp +gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgttt +ggcctctacctacgagtctacgcgggcgtttttaagcaag +>28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp +agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggat +gtcttagcgctctcgttccgatgggtgctgatactagtaa +>29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp +tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaatt +gggagtgctggcacttgtgccctacagtcaagcgctcacg +>30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp +caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctcCTTTTAGG +TGTCTAACTAGAGAGGCGCCCTATTGGGCTCAGGATGACG +>31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp +tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccg +gtctaacagggcttccgaatcaatagactgatagtaatgg +>32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp +gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgc +tcggcactgcatttttatacgtcgaatcagaaacgaggtt +>33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp +gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctg +ggcttcacgacatgttcttaaatcaatactctaaatctgc +>34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp +gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatat +tgatagtggttagaccaccggcgcatcatcgtacgagcgc +>35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp +gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatct +gcggggtaatgcaacagggggctaccagacggtaaaccag +>36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp +ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagt +tacacgccgaggtgtaaacgaatacgattgctatatgcaa +>37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp +ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtca +acgagacgacccattgtcacgttgtaaggccaccaataac +>38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp +gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattca +cttaccccttgtcaccgggcagaagagagccagtttaggt +>39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp +accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaac +ggtttgaaatcctttgcaaacttgatctgggtatatgaac +>40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp +agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgcc +gtctactcgacagggaccccccgtcggttcctctctatag +>41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp +ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtcc +ttatgcttgagacatgaatccttgccccatattggcgatc +>42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp +gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacg +gacctctctgagatctcgcaccgcgcgcccggccggcact +>43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp +ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggact +tccaaatactccaagacgtcaatacgctttatctttgtga +>44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp +gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaa +gcgcctgcctggggaatacgagcctctctacaaacttacg +>45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp +aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgacc +ttgatcgcatgtgaacccgcccaaaaacccgtctcgacaa +>46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp +aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactg +ctacgttcaccgtgttcagagatagagagtacattaggga +>47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp +gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccga +cgcggagaattccctaaccactattgtcctctgcatcgat +>48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp +catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggc +cgctacatagtacacggcgaggaatgcggggttgggctga +>49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp +taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattca +tcactgtagaccaaggaccgggcatacttgcggatatcta +>50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp +taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcg +agaagtaggacgtcgtgtaatactccaacgtcgttacgca diff --git a/tests/test_read_sequencer.py b/tests/test_read_sequencer.py new file mode 100644 index 0000000000000000000000000000000000000000..ec605212cb920fb80e665f23b1d5373324c5f7bd --- /dev/null +++ b/tests/test_read_sequencer.py @@ -0,0 +1,9 @@ +import pytest + +from readsequencer.read_sequencer import ReadSequencer + +sequencer = ReadSequencer() + + +def test_chunksize(): + assert sequencer.chunk_size == 10000