Merge branch 'dev' into 'main'

refactor: update tests, CI, main script See merge request !40

Merge branch 'dev' into 'main'
d686c170 · Mate Balajti · 586ebf6e · 7b1148fc · d686c170 · 586ebf6e
Commit d686c170 authored 1 year ago by Mate Balajti
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,7 +6,6 @@ default:         # Set default
 stages:          # List of stages for jobs, and their order of execution
  - build
  - test
-  - deploy

 build-job:       # This job runs in the build stage, which runs first.
  stage: build
@@ -30,12 +29,6 @@ lint-test-job:   # This job also runs in the test stage.
    - pip install -r requirements.txt
    - pip install -r requirements-dev.txt
    - pip install -e .
-    - flake8 --docstring-convention google --max-line-length 120 readsequencer/ --ignore=D212,D103,D104,D107,D100,D017,D415
-    #- pylint readsequencer/ tests/
-
-deploy-job:      # This job runs in the deploy stage.
-  stage: deploy  # It only runs when *both* jobs in the test stage complete successfully.
-  environment: production
-  script:
-    - echo "Deploying application..."
-    - echo "Application successfully deployed."
+    - flake8 --docstring-convention google readsequencer/ tests/
+    - pylint readsequencer/ tests/
+    - mypy readsequencer/ tests/
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
-FROM python:3.11.1-slim-bullseye
-
-# MAINTAINER add_maintainer
-MAINTAINER "Christoph Harmel"
-# set names for user, group and user home
-ARG USER="bioc"
-ARG GROUP="bioc"
-ARG WORKDIR="/home/${USER}"
-
-# create user, group, user home & makes the user and group
-# the owner of the user home
-RUN mkdir -p $WORKDIR \
-  && groupadd -r $GROUP \
-  && useradd --no-log-init -r -g $GROUP $USER \
-  && chown -R ${USER}:${GROUP} $WORKDIR \
-  && chmod 700 $WORKDIR
-
-# set the user, make sure the location where pip
-# installs binaries/executables is part of the $PATH
-# and set the working directory to the user's home
-USER $USER
-ENV PATH="${WORKDIR}/.local/bin:${PATH}"
-WORKDIR $WORKDIR
-
-# copy entire content of the current directory to the
-# working directory; ensure that this does not contain any
-# files that you don't want to have end up in the Docker
-# image, especially not any secrets!
-# use `.dockerignore` to set files that you do NOT want
-# Docker to copy
-COPY --chown=${USER}:${GROUP} . $WORKDIR
-
-# install app and development dependencies
-# assumes that dependencies in `requirements.txt` are
-# automatically installed when installing the app; if
-# that is not the case, they need to be installed
-# _before_ installing the app
-RUN pip install . \
-  && pip install --no-cache-dir -r requirements-dev.txt
-
-# set default command; optional
-CMD ["readsequencer"]
--- a/readsequencer/__init__.py
+++ b/readsequencer/__init__.py
+"""Initialise read-sequencer."""
--- a/readsequencer/cli.py
+++ b/readsequencer/cli.py
+"""Receive command line arguments."""
 import argparse
 import logging
 from readsequencer.read_sequencer import ReadSequencer

+logging.basicConfig(
+    format='[%(asctime)s: %(levelname)s] %(message)s \
+        (module "%(module)s")',
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+
 LOG = logging.getLogger(__name__)


 def main():
+    """Use CLI arguments to simulate sequencing."""
    parser = argparse.ArgumentParser(
        prog="readsequencer",
-        description="Simulates sequencing of DNA sequences specified by an FASTA file.",
+        description="Simulates sequencing of DNA sequences specified \
+            by an FASTA file.",
+    )
+    parser.add_argument(
+        "output",
+        help="path to FASTA file"
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        default=None,
+        help="path to FASTA file"
    )
-
-    parser.add_argument("output", help="path to FASTA file")
-    parser.add_argument("-i", "--input", default=None, help="path to FASTA file")
    parser.add_argument(
-        "-r", "--read-length", default=100, help="read length for sequencing", type=int
+        "-r",
+        "--read-length",
+        type=int,
+        default=100,
+        help="read length for sequencing",
    )
    parser.add_argument(
        "-n",
        "--n_random",
        default=100,
        type=int,
-        help="n random sequences. Just used if input fasta file is not specified.",
+        help="n random sequences. Just used if input"
+             "fasta file is not specified.",
    )
    parser.add_argument(
        "-s",
@@ -30,7 +52,6 @@ def main():
        type=int,
        help="chunk_size for batch processing",
    )
-
    args = parser.parse_args()
    LOG.info("Read sequencer started.")
    if args.input is not None:
@@ -55,10 +76,5 @@ def main():
    LOG.info("Read sequencer finished.")


-if __name__ == "__main__":
-    logging.basicConfig(
-        format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
-        level=logging.INFO,
-    )
-    LOG = logging.getLogger(__name__)
+if __name__ == '__main__':
    main()
--- a/readsequencer/read_sequencer.py
+++ b/readsequencer/read_sequencer.py
+"""Main module for read sequencer."""
 from random import choices
 from collections.abc import Generator, Iterator
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
+from Bio import SeqIO  # type: ignore
+from Bio.Seq import Seq  # type: ignore
+from Bio.SeqRecord import SeqRecord  # type: ignore


 class ReadSequencer:
-    """ReadSequencer class
+    """ReadSequencer class.

    Args:
        fasta: path fasta file
@@ -22,23 +23,22 @@ class ReadSequencer:

    def __init__(
        self,
-        fasta: str = None,
-        output: str = None,
+        fasta=None,
+        output=None,
        read_length: int = 150,
        chunk_size: int = 10000,
    ) -> None:
-
+        """Initialise class."""
        self.fasta = fasta
        self.output = output
        self.read_length = read_length
        self.chunk_size = chunk_size
        self.random = False
        self.bases = ("A", "T", "C", "G")
-        self.n_sequences = None
+        self.n_sequences: int

    def get_n_sequences(self) -> None:
-        """
-        Helper function to detect number of sequences present in set fasta file.
+        """Detect number of sequences present in set fasta file.

        Returns:
             None
@@ -46,8 +46,7 @@ class ReadSequencer:
        self.n_sequences = len(list(SeqIO.parse(self.fasta, "fasta")))

    def define_random_sequences(self, n_seq: int) -> None:
-        """
-        Defines random sequences.
+        """Define random sequences.

        Args:
             n_seq: number of random sequences to be generated
@@ -59,8 +58,7 @@ class ReadSequencer:
        self.n_sequences = n_seq

    def generate_random_sequence(self, length: int) -> Seq:
-        """
-        Generates random sequence.
+        """Generate random sequence.

        Args:
            length: length of sequence
@@ -73,7 +71,7 @@ class ReadSequencer:
        return seq

    def resize_sequence(self, record: SeqRecord) -> SeqRecord:
-        """Resizes sequence
+        """Resize sequence.

        Resizes sequence according to set read length. If sequence is
         shorter than read length, fills up with random nucleotides.
@@ -93,7 +91,7 @@ class ReadSequencer:
        return record.seq

    def batch_iterator(self, iterator: Iterator, batch_size: int) -> Generator:
-        """Generates batch iterator.
+        """Generate batch iterator.

        This is a generator function, and it returns lists of the
        entries from the supplied iterator.  Each list will have
@@ -114,7 +112,7 @@ class ReadSequencer:
                batch = []

    def run_sequencing(self) -> None:
-        """Runs sequencing.
+        """Run sequencing.

        Runs read sequencing of specified sequences from input fasta file or
         generates random sequences for a given read length. If number of
@@ -125,7 +123,7 @@ class ReadSequencer:
        """
        if self.random:
            if self.n_sequences <= self.chunk_size:
-                with open(self.output, "w") as output_handle:
+                with open(self.output, "w", encoding="utf-8") as output_handle:
                    for i in range(self.n_sequences):
                        record = SeqRecord(
                            self.generate_random_sequence(self.read_length),
@@ -134,35 +132,44 @@ class ReadSequencer:
                        SeqIO.write(record, output_handle, "fasta")
            else:
                batch_generator = self.batch_iterator(
-                    range(self.n_sequences), self.chunk_size
+                    iter(range(self.n_sequences)), self.chunk_size
                )
                for i, batch in enumerate(batch_generator):
-                    filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (
-                        i + 1
+                    filename = (
+                        self.output.replace(".fasta", "") +
+                        f"_chunk_{i + 1}.fasta"
                    )
-                    with open(filename, "w") as output_handle:
-                        for j, k in enumerate(batch):
+                    with open(
+                        filename, "w", encoding="utf-8"
+                    ) as output_handle:
+                        for j, _ in enumerate(batch):
                            record = SeqRecord(
-                                self.generate_random_sequence(self.read_length),
+                                self.generate_random_sequence(
+                                    self.read_length
+                                ),
                                id="random_seq: " + str(j + 1),
                            )
                            SeqIO.write(record, output_handle, "fasta")
        else:
            if self.n_sequences <= self.chunk_size:
-                with open(self.fasta) as input_handle, open(
-                    self.output, "w"
+                with open(self.fasta, encoding="utf-8") as input_handle, open(
+                    self.output, "w", encoding="utf-8"
                ) as output_handle:
                    for record in SeqIO.parse(input_handle, "fasta"):
                        record.seq = self.resize_sequence(record)
                        SeqIO.write(record, output_handle, "fasta")

            else:
-                record_iter = SeqIO.parse(open(self.fasta), "fasta")
-                for i, batch in enumerate(
-                    self.batch_iterator(record_iter, self.chunk_size)
-                ):
-                    filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (i + 1)
-                    for j, record in enumerate(batch):
-                        batch[j].seq = self.resize_sequence(record)
-                    with open(filename, "w") as handle:
-                        SeqIO.write(batch, handle, "fasta")
+                with open(self.fasta, encoding="utf-8") as file:
+                    record_iter = SeqIO.parse(file, "fasta")
+                    for i, batch in enumerate(
+                        self.batch_iterator(record_iter, self.chunk_size)
+                    ):
+                        filename = (
+                            self.output.replace(".fasta", "") +
+                            f"_chunk_{i + 1}.fasta"
+                        )
+                        for j, record in enumerate(batch):
+                            record.seq = self.resize_sequence(record)
+                        with open(filename, "w", encoding="utf-8") as handle:
+                            SeqIO.write(batch, handle, "fasta")
--- a/setup.py
+++ b/setup.py
-from setuptools import setup, find_packages
+"""Setup tool."""
 from pathlib import Path
+from setuptools import setup, find_packages  # type: ignore
+
 project_root_dir = Path(__file__).parent.resolve()
-with open(project_root_dir / "requirements.txt", "r", encoding="utf-8") as _file:
+with open(
+    project_root_dir / "requirements.txt", "r", encoding="utf-8"
+) as _file:
    INSTALL_REQUIRES = _file.read().splitlines()

+URL = ('https://git.scicore.unibas.ch/zavolan_group/'
+       'tools/read-sequencer')
+
 setup(
    name='readsequencer',
    version='0.1.1',
-    url='https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer',
+    url=URL,
    license='MIT',
    author='Clara Serger, Michael Sandholzer and Christoph Harmel',
    author_email='christoph.harmel@unibas.ch',
-    description='Simulates sequencing with a specified read length from sequences specified by a FASTA file.',
+    description='Simulates sequencing with a specified read length from'
+                'sequences specified by a FASTA file.',
    packages=find_packages(),
    install_requires=INSTALL_REQUIRES,
-    entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']}
+    entry_points={
+        'console_scripts': [
+            'readsequencer=readsequencer.cli:main'
+            ]
+        }
 )
--- a/tests/__init__.py
+++ b/tests/__init__.py
+"""Initialise testing."""
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
-import readsequencer.cli
+"""Test cli.py."""
 import pytest
-from cli_test_helpers import ArgvContext, shell
-import os
-import glob
+from cli_test_helpers import ArgvContext, shell  # type: ignore
+import readsequencer.cli
+
+
 def test_entrypoint():
-    """
-    Is entrypoint script installed? (setup.py)
-    """
+    """Test if entrypoint script is installed (setup.py)."""
    result = shell('readsequencer --help')
    assert result.exit_code == 0

+
 def test_usage_no_args():
-    """
-    Does CLI abort w/o arguments, displaying usage instructions?
-    """
+    """Test if CLI aborts w/o arguments, displaying usage instructions."""
    with ArgvContext('readsequencer'), pytest.raises(SystemExit):
        readsequencer.cli.main()


--- a/tests/test_read_sequencer.py
+++ b/tests/test_read_sequencer.py
-import pytest
+"""Test read_sequencer.py."""
 import os
 import glob
 from readsequencer.read_sequencer import ReadSequencer

+
 def test_init_default():
+    """Test default initation."""
    sequencer = ReadSequencer()
    assert sequencer.fasta is None
    assert sequencer.read_length == 150
@@ -13,6 +15,7 @@ def test_init_default():


 def test_run_random():
+    """Test random run."""
    sequencer = ReadSequencer(
        output="./tests/fasta_testfile/results.fasta")
    sequencer.define_random_sequences(n_seq=100)
@@ -23,7 +26,9 @@ def test_run_random():
    sequencer.run_sequencing()
    os.remove("./tests/fasta_testfile/results.fasta")

+
 def test_run_random_chunks():
+    """Test random run chunks."""
    # setup class
    sequencer = ReadSequencer(
        output="./tests/fasta_testfile/results.fasta",
@@ -44,6 +49,7 @@ def test_run_random_chunks():


 def test_run_sequencing():
+    """Test sequencing run."""
    sequencer = ReadSequencer(
        fasta="./tests/fasta_testfile/50_seqs_50_1000_bp.fasta",
        output="./tests/fasta_testfile/results.fasta",
@@ -59,7 +65,9 @@ def test_run_sequencing():
    for file in result_file:
        os.remove(file)

+
 def test_run_sequencing_chunks():
+    """Test run sequencing chunks."""
    # setup class
    sequencer = ReadSequencer(
        fasta="./tests/fasta_testfile/50_seqs_50_1000_bp.fasta",
@@ -78,6 +86,3 @@ def test_run_sequencing_chunks():
    assert len(result_files) == 5
    for file in result_files:
        os.remove(file)
-
-
-