diff --git a/readsequencer/__init__.py b/readsequencer/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bcc1665e55c64d2ec9dc148a5666b910b7e44c36 100644 --- a/readsequencer/__init__.py +++ b/readsequencer/__init__.py @@ -0,0 +1 @@ +"""Initialise read-sequencer.""" diff --git a/readsequencer/cli.py b/readsequencer/cli.py index cc74c44cef025035b96a8963e79b3c89b280a4c2..ea00a8b9f84117e2b1bb8c06f422b0c7fdd4f5c5 100644 --- a/readsequencer/cli.py +++ b/readsequencer/cli.py @@ -1,27 +1,40 @@ +"""Receive command line arguments.""" import argparse import logging from readsequencer.read_sequencer import ReadSequencer +logging.basicConfig( + format='[%(asctime)s: %(levelname)s] %(message)s \ + (module "%(module)s")', + level=logging.INFO, +) +logger = logging.getLogger(__name__) + LOG = logging.getLogger(__name__) def main(): + """Use CLI arguments to simulate sequencing.""" parser = argparse.ArgumentParser( prog="readsequencer", - description="Simulates sequencing of DNA sequences specified by an FASTA file.", + description="Simulates sequencing of DNA sequences specified \ + by an FASTA file.", ) parser.add_argument("output", help="path to FASTA file") - parser.add_argument("-i", "--input", default=None, help="path to FASTA file") parser.add_argument( - "-r", "--read-length", default=100, help="read length for sequencing", type=int + "-i", "--input", default=None, help="path to FASTA file") + parser.add_argument( + "-r", "--read-length", default=100, + help="read length for sequencing", type=int ) parser.add_argument( "-n", "--n_random", default=100, type=int, - help="n random sequences. Just used if input fasta file is not specified.", + help="n random sequences. Just used if input \ + fasta file is not specified.", ) parser.add_argument( "-s", @@ -55,10 +68,5 @@ def main(): LOG.info("Read sequencer finished.") -if __name__ == "__main__": - logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', - level=logging.INFO, - ) - LOG = logging.getLogger(__name__) +if __name__ == '__main__': main() diff --git a/readsequencer/read_sequencer.py b/readsequencer/read_sequencer.py index 6cd7565a114b012a468b75bf1dcefb0ee8ad316c..0283b3bde78973bc81c85895cff7523f6b8047ca 100644 --- a/readsequencer/read_sequencer.py +++ b/readsequencer/read_sequencer.py @@ -1,3 +1,4 @@ +"""Main module for read sequencer.""" from random import choices from collections.abc import Generator, Iterator from Bio import SeqIO @@ -6,7 +7,7 @@ from Bio.SeqRecord import SeqRecord class ReadSequencer: - """ReadSequencer class + """ReadSequencer class. Args: fasta: path fasta file @@ -27,7 +28,7 @@ class ReadSequencer: read_length: int = 150, chunk_size: int = 10000, ) -> None: - + """Initialise class.""" self.fasta = fasta self.output = output self.read_length = read_length @@ -37,8 +38,7 @@ class ReadSequencer: self.n_sequences = None def get_n_sequences(self) -> None: - """ - Helper function to detect number of sequences present in set fasta file. + """Detect number of sequences present in set fasta file. Returns: None @@ -46,8 +46,7 @@ class ReadSequencer: self.n_sequences = len(list(SeqIO.parse(self.fasta, "fasta"))) def define_random_sequences(self, n_seq: int) -> None: - """ - Defines random sequences. + """Define random sequences. Args: n_seq: number of random sequences to be generated @@ -59,8 +58,7 @@ class ReadSequencer: self.n_sequences = n_seq def generate_random_sequence(self, length: int) -> Seq: - """ - Generates random sequence. + """Generate random sequence. Args: length: length of sequence @@ -73,7 +71,7 @@ class ReadSequencer: return seq def resize_sequence(self, record: SeqRecord) -> SeqRecord: - """Resizes sequence + """Resize sequence. Resizes sequence according to set read length. If sequence is shorter than read length, fills up with random nucleotides. @@ -93,7 +91,7 @@ class ReadSequencer: return record.seq def batch_iterator(self, iterator: Iterator, batch_size: int) -> Generator: - """Generates batch iterator. + """Generate batch iterator. This is a generator function, and it returns lists of the entries from the supplied iterator. Each list will have @@ -114,7 +112,7 @@ class ReadSequencer: batch = [] def run_sequencing(self) -> None: - """Runs sequencing. + """Run sequencing. Runs read sequencing of specified sequences from input fasta file or generates random sequences for a given read length. If number of @@ -125,7 +123,7 @@ class ReadSequencer: """ if self.random: if self.n_sequences <= self.chunk_size: - with open(self.output, "w") as output_handle: + with open(self.output, "w", encoding="utf-8") as output_handle: for i in range(self.n_sequences): record = SeqRecord( self.generate_random_sequence(self.read_length), @@ -137,10 +135,8 @@ class ReadSequencer: range(self.n_sequences), self.chunk_size ) for i, batch in enumerate(batch_generator): - filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % ( - i + 1 - ) - with open(filename, "w") as output_handle: + filename = self.output.replace(".fasta", "") + f"_chunk_{i}.fasta" % (i + 1) + with open(filename, "w", encoding="utf-8") as output_handle: for j, k in enumerate(batch): record = SeqRecord( self.generate_random_sequence(self.read_length), @@ -149,15 +145,15 @@ class ReadSequencer: SeqIO.write(record, output_handle, "fasta") else: if self.n_sequences <= self.chunk_size: - with open(self.fasta) as input_handle, open( - self.output, "w" + with open(self.fasta, encoding="utf-8") as input_handle, open( + self.output, "w", encoding="utf-8" ) as output_handle: for record in SeqIO.parse(input_handle, "fasta"): record.seq = self.resize_sequence(record) SeqIO.write(record, output_handle, "fasta") else: - record_iter = SeqIO.parse(open(self.fasta), "fasta") + record_iter = SeqIO.parse(open(self.fasta, encoding="utf-8"), "fasta") for i, batch in enumerate( self.batch_iterator(record_iter, self.chunk_size) ): diff --git a/setup.py b/setup.py index 8a94d0d97f65a2f1a6dd9f9ce80635f5454f7fca..7974becdd617c17eca21cd142401835b0de74dd3 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,11 @@ -from setuptools import setup, find_packages +"""Setup tool.""" from pathlib import Path +from setuptools import setup, find_packages project_root_dir = Path(__file__).parent.resolve() -with open(project_root_dir / "requirements.txt", "r", encoding="utf-8") as _file: - INSTALL_REQUIRES = _file.read().splitlines() +with open( + project_root_dir / "requirements.txt", "r", encoding="utf-8" +) as _file: + INSTALL_REQUIRED = _file.read().splitlines() setup( name='readsequencer', @@ -11,8 +14,9 @@ setup( license='MIT', author='Clara Serger, Michael Sandholzer and Christoph Harmel', author_email='christoph.harmel@unibas.ch', - description='Simulates sequencing with a specified read length from sequences specified by a FASTA file.', + description='Simulates sequencing with a specified read length from \ + sequences specified by a FASTA file.', packages=find_packages(), - install_requires=INSTALL_REQUIRES, + install_requires=INSTALL_REQUIRED, entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']} ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 6cb9ae1d6146a9045255f46c32e764b2ce18c516..17d83cdaff9a2268cdf7adc31a4bd2ec06fd7d1a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,19 +1,17 @@ -import readsequencer.cli +"""Test cli.py.""" import pytest from cli_test_helpers import ArgvContext, shell -import os -import glob +import readsequencer.cli + + def test_entrypoint(): - """ - Is entrypoint script installed? (setup.py) - """ + """Test if entrypoint script is installed (setup.py).""" result = shell('readsequencer --help') assert result.exit_code == 0 + def test_usage_no_args(): - """ - Does CLI abort w/o arguments, displaying usage instructions? - """ + """Test if CLI aborts w/o arguments, displaying usage instructions.""" with ArgvContext('readsequencer'), pytest.raises(SystemExit): readsequencer.cli.main() diff --git a/tests/test_read_sequencer.py b/tests/test_read_sequencer.py index 7157ccd670226bfe55479379fc8fea87b90ff861..86a589dfb41546f44a5413d3306cb20db63c4cde 100644 --- a/tests/test_read_sequencer.py +++ b/tests/test_read_sequencer.py @@ -1,9 +1,11 @@ -import pytest +"""Test read_sequencer.py.""" import os import glob from readsequencer.read_sequencer import ReadSequencer + def test_init_default(): + """Test default initation.""" sequencer = ReadSequencer() assert sequencer.fasta is None assert sequencer.read_length == 150 @@ -13,6 +15,7 @@ def test_init_default(): def test_run_random(): + """Test random run.""" sequencer = ReadSequencer( output="./tests/fasta_testfile/results.fasta") sequencer.define_random_sequences(n_seq=100) @@ -23,7 +26,9 @@ def test_run_random(): sequencer.run_sequencing() os.remove("./tests/fasta_testfile/results.fasta") + def test_run_random_chunks(): + """Test random run chunks.""" # setup class sequencer = ReadSequencer( output="./tests/fasta_testfile/results.fasta", @@ -44,6 +49,7 @@ def test_run_random_chunks(): def test_run_sequencing(): + """Test sequencing run.""" sequencer = ReadSequencer( fasta="./tests/fasta_testfile/50_seqs_50_1000_bp.fasta", output="./tests/fasta_testfile/results.fasta", @@ -59,7 +65,9 @@ def test_run_sequencing(): for file in result_file: os.remove(file) + def test_run_sequencing_chunks(): + """Test run sequencing chunks.""" # setup class sequencer = ReadSequencer( fasta="./tests/fasta_testfile/50_seqs_50_1000_bp.fasta", @@ -78,6 +86,3 @@ def test_run_sequencing_chunks(): assert len(result_files) == 5 for file in result_files: os.remove(file) - - -