diff --git a/readsequencer/read_sequencer.py b/readsequencer/read_sequencer.py index 0283b3bde78973bc81c85895cff7523f6b8047ca..583bde244fd6d23751e64fbc2266f793846ec865 100644 --- a/readsequencer/read_sequencer.py +++ b/readsequencer/read_sequencer.py @@ -23,8 +23,8 @@ class ReadSequencer: def __init__( self, - fasta: str = None, - output: str = None, + fasta: str, + output: str, read_length: int = 150, chunk_size: int = 10000, ) -> None: @@ -35,7 +35,7 @@ class ReadSequencer: self.chunk_size = chunk_size self.random = False self.bases = ("A", "T", "C", "G") - self.n_sequences = None + self.n_sequences: int def get_n_sequences(self) -> None: """Detect number of sequences present in set fasta file. @@ -135,11 +135,18 @@ class ReadSequencer: range(self.n_sequences), self.chunk_size ) for i, batch in enumerate(batch_generator): - filename = self.output.replace(".fasta", "") + f"_chunk_{i}.fasta" % (i + 1) - with open(filename, "w", encoding="utf-8") as output_handle: - for j, k in enumerate(batch): + filename = ( + self.output.replace(".fasta", "") + + f"_chunk_{i}.fasta" % (i + 1) + ) + with open( + filename, "w", encoding="utf-8" + ) as output_handle: + for j, _ in enumerate(batch): record = SeqRecord( - self.generate_random_sequence(self.read_length), + self.generate_random_sequence( + self.read_length + ), id="random_seq: " + str(j + 1), ) SeqIO.write(record, output_handle, "fasta") @@ -153,12 +160,16 @@ class ReadSequencer: SeqIO.write(record, output_handle, "fasta") else: - record_iter = SeqIO.parse(open(self.fasta, encoding="utf-8"), "fasta") - for i, batch in enumerate( - self.batch_iterator(record_iter, self.chunk_size) - ): - filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (i + 1) - for j, record in enumerate(batch): - batch[j].seq = self.resize_sequence(record) - with open(filename, "w") as handle: - SeqIO.write(batch, handle, "fasta") + with open(self.fasta, encoding="utf-8") as file: + record_iter = SeqIO.parse(file, "fasta") + for i, batch in enumerate( + self.batch_iterator(record_iter, self.chunk_size) + ): + filename = ( + self.output.replace(".fasta", "") + + f"_chunk_{i}.fasta" % (i + 1) + ) + for j, record in enumerate(batch): + batch[j].seq = self.resize_sequence(record) + with open(filename, "w", encoding="utf-8") as handle: + SeqIO.write(batch, handle, "fasta") diff --git a/setup.py b/setup.py index 7974becdd617c17eca21cd142401835b0de74dd3..23ec8cccb57f5ca0174872100d125da514c33c0e 100644 --- a/setup.py +++ b/setup.py @@ -14,8 +14,8 @@ setup( license='MIT', author='Clara Serger, Michael Sandholzer and Christoph Harmel', author_email='christoph.harmel@unibas.ch', - description='Simulates sequencing with a specified read length from \ - sequences specified by a FASTA file.', + description='Simulates sequencing with a specified read length from' + 'sequences specified by a FASTA file.', packages=find_packages(), install_requires=INSTALL_REQUIRED, entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']}