Skip to content
Snippets Groups Projects
Commit 93c53698 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

feat: updated folder structure, initiated tests, black reformated

parent 28cc31f1
Branches
No related tags found
1 merge request!27feat: updated folder structure, initiated tests, black reformated
import argparse
from read_sequencer import ReadSequencer
import logging
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of DNA sequences specified by an FASTA file.')
parser.add_argument('output',
help='path to FASTA file')
parser.add_argument('-i','--input', default=None,
help='path to FASTA file')
parser.add_argument('-r','--read-length', default=100,
help='read length for sequencing',
type=int)
parser.add_argument('-n','--n_random', default=100, type=int,
help='n random sequences. Just used if input fasta file is not specified.')
parser.add_argument('-s','--chunk-size', default=10000, type=int, help='chunk_size for batch processing')
args = parser.parse_args()
def main():
LOG.info("Read sequencer started.")
if args.input is not None:
read_sequencer = ReadSequencer(fasta=args.input, output=args.output, read_length=args.read_length, chunk_size=args.chunk_size)
read_sequencer.get_n_sequences()
else:
read_sequencer = ReadSequencer(fasta=args.input, output=args.output, read_length=args.read_length, chunk_size=args.chunk_size)
read_sequencer.define_random_sequences(n=args.n_random)
read_sequencer.run_sequencing()
LOG.info("Read sequencer finished.")
if __name__ == '__main__':
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO)
LOG = logging.getLogger(__name__)
main()
File moved
import argparse
import logging
from readsequencer.read_sequencer import ReadSequencer
LOG = logging.getLogger(__name__)
parser = argparse.ArgumentParser(
prog="read_sequencer",
description="Simulates sequencing of DNA sequences specified by an FASTA file.",
)
parser.add_argument("output", help="path to FASTA file")
parser.add_argument("-i", "--input", default=None, help="path to FASTA file")
parser.add_argument(
"-r", "--read-length", default=100, help="read length for sequencing", type=int
)
parser.add_argument(
"-n",
"--n_random",
default=100,
type=int,
help="n random sequences. Just used if input fasta file is not specified.",
)
parser.add_argument(
"-s",
"--chunk-size",
default=10000,
type=int,
help="chunk_size for batch processing",
)
args = parser.parse_args()
def main():
LOG.info("Read sequencer started.")
if args.input is not None:
read_sequencer = ReadSequencer(
fasta=args.input,
output=args.output,
read_length=args.read_length,
chunk_size=args.chunk_size,
)
read_sequencer.get_n_sequences()
else:
read_sequencer = ReadSequencer(
fasta=args.input,
output=args.output,
read_length=args.read_length,
chunk_size=args.chunk_size,
)
read_sequencer.define_random_sequences(n_seq=args.n_random)
read_sequencer.run_sequencing()
LOG.info("Read sequencer finished.")
if __name__ == "__main__":
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO,
)
LOG = logging.getLogger(__name__)
main()
import logging
from random import choices from random import choices
from collections.abc import Generator, Iterator
from Bio import SeqIO from Bio import SeqIO
from Bio.Seq import Seq from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord from Bio.SeqRecord import SeqRecord
from collections.abc import Generator, Iterator
LOG = logging.getLogger(__name__)
class ReadSequencer: class ReadSequencer:
def __init__(self, fasta: str = None, output: str = None, read_length: int = 150, chunk_size: int = 10000) -> None: """ReadSequencer class
""" ReadSequencer class Args:
Args: fasta: path fasta file
fasta: path fasta file output: path output fasta file(s)
output: path output fasta file(s) read_length: read length, defaults to 150.
read_length: read length, defaults to 150. chunk_size: batch size used for memory efficient processing,
chunk_size: batch size used for memory efficient processing, only used when number of sequences greater only used when number of sequences greater
than number of passed sequences. Defaults to 10000. than number of passed sequences. Defaults to 10000.
Returns:
None
"""
def __init__(
self,
fasta: str = None,
output: str = None,
read_length: int = 150,
chunk_size: int = 10000,
) -> None:
Returns:
None
"""
self.fasta = fasta self.fasta = fasta
self.output = output self.output = output
self.read_length = read_length self.read_length = read_length
self.chunk_size = chunk_size self.chunk_size = chunk_size
self.random = False self.random = False
self.bases = ('A', 'T', 'C', 'G') self.bases = ("A", "T", "C", "G")
self.n_sequences = None self.n_sequences = None
def get_n_sequences(self) -> None: def get_n_sequences(self) -> None:
""" """
Helper function to detect number of sequences present in set fasta file. Helper function to detect number of sequences present in set fasta file.
Returns: Returns:
None None
""" """
self.n_sequences = len(list(SeqIO.parse(self.fasta, 'fasta'))) self.n_sequences = len(list(SeqIO.parse(self.fasta, "fasta")))
def define_random_sequences(self, n: int) -> None: def define_random_sequences(self, n_seq: int) -> None:
""" """
Defines random sequences. Defines random sequences.
Args: Args:
n: number of random sequences to be generated n_seq: number of random sequences to be generated
Returns: Returns:
None None
""" """
self.random = True self.random = True
self.n_sequences = n self.n_sequences = n_seq
def generate_random_sequence(self, length: int) -> Seq: def generate_random_sequence(self, length: int) -> Seq:
""" """
Generates random sequence. Generates random sequence.
Args: Args:
length: length of sequence length: length of sequence
Returns: Returns:
random sequence of length n random sequence of length n
""" """
seq = choices(self.bases, k=length) seq = choices(self.bases, k=length)
seq = Seq(''.join(seq)) seq = Seq("".join(seq))
return seq return seq
def resize_sequence(self, record:SeqRecord) -> SeqRecord: def resize_sequence(self, record: SeqRecord) -> SeqRecord:
""" """
Resizes sequence according to set read length. If sequence is shorter than read length, fills up Resizes sequence according to set read length. If sequence is
with random nucleotides. shorter than read length, fills up with random nucleotides.
Args: Args:
record: SeqRecord record: SeqRecord
Returns: Returns:
resized SeqRecord resized SeqRecord
""" """
if (len(record)) >= self.read_length: if (len(record)) >= self.read_length:
record.seq = record.seq[0:self.read_length] record.seq = record.seq[0:self.read_length-1]
else: else:
n_add = self.read_length - len(record) n_add = self.read_length - len(record)
add_seq = self.generate_random_sequence(n_add) add_seq = self.generate_random_sequence(n_add)
...@@ -85,16 +92,16 @@ class ReadSequencer: ...@@ -85,16 +92,16 @@ class ReadSequencer:
def batch_iterator(self, iterator: Iterator, batch_size: int) -> Generator: def batch_iterator(self, iterator: Iterator, batch_size: int) -> Generator:
""" """
This is a generator function, and it returns lists of the This is a generator function, and it returns lists of the
entries from the supplied iterator. Each list will have entries from the supplied iterator. Each list will have
batch_size entries, although the final list may be shorter. batch_size entries, although the final list may be shorter.
Args: Args:
iterator: iterator object generated with Bio.SeqIO.parse() iterator: iterator object generated with Bio.SeqIO.parse()
batch_size: batch size to use for the generator batch_size: batch size to use for the generator
Returns: Returns:
list of entries from supplied iterator according to batch_size list of entries from supplied iterator according to batch_size
""" """
batch = [] batch = []
for entry in iterator: for entry in iterator:
...@@ -105,42 +112,53 @@ class ReadSequencer: ...@@ -105,42 +112,53 @@ class ReadSequencer:
def run_sequencing(self) -> None: def run_sequencing(self) -> None:
""" """
Runs read sequencing of specified sequences from input fasta file or generates random sequences for a given Runs read sequencing of specified sequences from input fasta file or
read length. If number of sequences exceeds chunk-size, it will switch to batch processing mode. generates random sequences for a given read length. If number of
sequences exceeds chunk-size, it will switch to batch processing mode.
Returns: Returns:
Writes processed sequences to output fasta file(s). Writes processed sequences to output fasta file(s).
""" """
if self.random: if self.random:
if self.n_sequences < self.chunk_size: if self.n_sequences <= self.chunk_size:
with open(self.output, 'w') as output_handle: with open(self.output, "w") as output_handle:
for i in range(self.n_sequences): for i in range(self.n_sequences):
record = SeqRecord( record = SeqRecord(
self.generate_random_sequence(self.read_length), self.generate_random_sequence(self.read_length),
id='random_seq: ' + str(i+1)) id="random_seq: " + str(i + 1),
SeqIO.write(record, output_handle, 'fasta') )
SeqIO.write(record, output_handle, "fasta")
else: else:
for i, batch in enumerate(self.batch_iterator(range(self.n_sequences), self.chunk_size)): batch_generator = self.batch_iterator(
filename = self.output.replace('.fasta','') + '_chunk_%i.fasta' % (i + 1) range(self.n_sequences), self.chunk_size
with open(filename, 'w') as output_handle: )
for i, batch in enumerate(batch_generator):
filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (
i + 1
)
with open(filename, "w") as output_handle:
for j, k in enumerate(batch): for j, k in enumerate(batch):
record = SeqRecord( record = SeqRecord(
self.generate_random_sequence(self.read_length), self.generate_random_sequence(self.read_length),
id='random_seq: ' + str(j+1)) id="random_seq: " + str(j + 1),
SeqIO.write(record, output_handle, 'fasta') )
SeqIO.write(record, output_handle, "fasta")
else: else:
if self.n_sequences < self.chunk_size: if self.n_sequences <= self.chunk_size:
with open(self.fasta) as input_handle, open( with open(self.fasta) as input_handle, open(
self.output, 'w') as output_handle: self.output, "w"
for record in SeqIO.parse(input_handle, 'fasta'): ) as output_handle:
for record in SeqIO.parse(input_handle, "fasta"):
record.seq = self.resize_sequence(record) record.seq = self.resize_sequence(record)
SeqIO.write(record, output_handle, 'fasta') SeqIO.write(record, output_handle, "fasta")
else: else:
record_iter = SeqIO.parse(open(self.fasta), 'fasta') record_iter = SeqIO.parse(open(self.fasta), "fasta")
for i, batch in enumerate(self.batch_iterator(record_iter, self.chunk_size)): for i, batch in enumerate(
filename = self.output.replace('.fasta','') + '_chunk_%i.fasta' % (i + 1) self.batch_iterator(record_iter, self.chunk_size)
):
filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (i + 1)
for j, record in enumerate(batch): for j, record in enumerate(batch):
batch[j].seq = self.resize_sequence(record) batch[j].seq = self.resize_sequence(record)
with open(filename, 'w') as handle: with open(filename, "w") as handle:
SeqIO.write(batch, handle, 'fasta') SeqIO.write(batch, handle, "fasta")
from setuptools import setup, find_packages from setuptools import setup, find_packages
setup( setup(
name='read_sequencer', name='readsequencer',
version='0.1.1', version='0.1.1',
url='https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer', url='https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer',
license='MIT', license='MIT',
...@@ -9,6 +9,6 @@ setup( ...@@ -9,6 +9,6 @@ setup(
author_email='christoph.harmel@unibas.ch', author_email='christoph.harmel@unibas.ch',
description='Simulates sequencing with a specified read length from sequences specified by a FASTA file.', description='Simulates sequencing with a specified read length from sequences specified by a FASTA file.',
packages=find_packages(), packages=find_packages(),
install_requires=['random','Bio','argparse','logging'], install_requires=['Bio','argparse'],
entry_points={'console_scripts': ['read_sequencer=read_sequencer_package.cli:main']} entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']}
) )
>1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp
tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggactta
aattccctagactagatctgtgttggaacgcctctctacg
>2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp
ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttca
gttaaggtgccacacccccgggtggatcatccgtcagctt
>3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp
acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgaccc
ccggtgttctgccaccttctttggataggagaaccgtcac
>4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp
acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgat
acaactcaaaggtaactgtgcttaccacttccgaagctac
>5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp
agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctg
acactgtagaagattagatacacttgtccctaaaattaac
>6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp
tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccata
gacttatctcagacatggaccatgtcgatatcggacgccg
>7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp
actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgctt
gctaagggacacggatcaatgatgaccagacttatggtgt
>8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp
attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgta
aacttgatggctttacgcctgcacgggcttcatacacaca
>9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp
aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacg
ggcactagttgaatggggggtttttttcgtaggtcgtaat
>10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp
atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcc
tgggatctctagcctagtattatgcgTCTTCGGAGCAGGG
>11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp
cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcac
cccacgttgcgatcatggccaaggccatggtttgctcaaa
>12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp
gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagg
gttacttaatcccttcgatgctttcaaaggccctaatcag
>13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp
aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggac
attctcagtggggagcgaagagttgcgcttagagccgacg
>14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp
atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtca
tctaagcgtatcggctcatactggtggtaactagacttgg
>15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp
accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattc
cttgttaaagtccgcaccaagtgtactgtaagaatggtcg
>16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp
ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgcctt
agcaaccactgcttatctgcgtattatacctttacaatca
>17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp
attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgact
cacgtagttcccgacgttcagtcccctccaacgtggaagg
>18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp
accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtg
ggagtgtggcacttattggtaaaaggcatttttacgaacg
>19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgt
taggccctcgggccccttcatatgtacggagtcattgaat
>20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp
atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaat
ctgcagatagaagtcagccctctcacgtcaataggaatgc
>21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp
cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctt
tggccaagcgtacaagaccccggacccatacgctcccggc
>22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp
caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccgg
ttgttaactgcgggagctataacacttattccttactgcg
>23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp
atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttag
tgtttgtacacgttaaggaaaagcgttagcttaaccatta
>24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp
cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgt
caacatgggggttgctcagtttggttggtcaatcaacggt
>25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp
ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgcca
atgacagatagagccattaatcgtggaaaccaggcattta
>26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp
gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcac
gtattaatcagctatcaacagatactatcgtcacagccct
>27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp
gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgttt
ggcctctacctacgagtctacgcgggcgtttttaagcaag
>28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp
agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggat
gtcttagcgctctcgttccgatgggtgctgatactagtaa
>29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp
tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaatt
gggagtgctggcacttgtgccctacagtcaagcgctcacg
>30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp
caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctcCTTTTAGG
TGTCTAACTAGAGAGGCGCCCTATTGGGCTCAGGATGACG
>31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp
tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccg
gtctaacagggcttccgaatcaatagactgatagtaatgg
>32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp
gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgc
tcggcactgcatttttatacgtcgaatcagaaacgaggtt
>33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp
gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctg
ggcttcacgacatgttcttaaatcaatactctaaatctgc
>34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp
gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatat
tgatagtggttagaccaccggcgcatcatcgtacgagcgc
>35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp
gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatct
gcggggtaatgcaacagggggctaccagacggtaaaccag
>36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp
ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagt
tacacgccgaggtgtaaacgaatacgattgctatatgcaa
>37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp
ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtca
acgagacgacccattgtcacgttgtaaggccaccaataac
>38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp
gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattca
cttaccccttgtcaccgggcagaagagagccagtttaggt
>39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp
accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaac
ggtttgaaatcctttgcaaacttgatctgggtatatgaac
>40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp
agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgcc
gtctactcgacagggaccccccgtcggttcctctctatag
>41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp
ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtcc
ttatgcttgagacatgaatccttgccccatattggcgatc
>42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp
gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacg
gacctctctgagatctcgcaccgcgcgcccggccggcact
>43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp
ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggact
tccaaatactccaagacgtcaatacgctttatctttgtga
>44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp
gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaa
gcgcctgcctggggaatacgagcctctctacaaacttacg
>45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp
aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgacc
ttgatcgcatgtgaacccgcccaaaaacccgtctcgacaa
>46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp
aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactg
ctacgttcaccgtgttcagagatagagagtacattaggga
>47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp
gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccga
cgcggagaattccctaaccactattgtcctctgcatcgat
>48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp
catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggc
cgctacatagtacacggcgaggaatgcggggttgggctga
>49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp
taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattca
tcactgtagaccaaggaccgggcatacttgcggatatcta
>50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp
taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcg
agaagtaggacgtcgtgtaatactccaacgtcgttacgca
import pytest
from readsequencer.read_sequencer import ReadSequencer
sequencer = ReadSequencer()
def test_chunksize():
assert sequencer.chunk_size == 10000
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment