refactor: update main and tests for CI workflow

f0286852 · Mate Balajti · 9d559d42 · 9d559d42 · 9d559d42 · 9d559d42
Commit f0286852 authored Aug 10, 2023 by Mate Balajti
--- a/.flake8
+++ b/.flake8
-[flake8]
-max-line-length = 120
-docstring-convention = google
\ No newline at end of file
--- a/.pylintrc
+++ b/.pylintrc
-[FORMAT]
-max-line-length=120
-[BASIC]
-good-names=df, i, fh, id, s, d
\ No newline at end of file
--- a/LICENSE.md
+++ b/LICENSE.md
-MIT License
-Copyright (c) 2021 Zavolan Lab, Biozentrum, University of Basel
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
 matplotlib
 pandas
+pandas-stubs
 pip
 tqdm
+types-tqdm
 flake8-docstrings
 mypy
 flake8


--- a/tests/test_main.py
+++ b/tests/test_main.py
-"""Tests for main module"""
+"""Tests for main module."""
-import numpy as np
 import pandas as pd
-import pytest
 from tsg.main import Gtf, TranscriptGenerator, dict_to_str, str_to_dict
@@ -10,8 +8,10 @@ class TestFreeTextParsing:
    """Test if free text dictionary is correctly parsed."""
    def test_str2dict(self):
+        """Test for str2dict function."""
        res = str_to_dict(
-            'gene_id "GENE2"; transcript_id "TRANSCRIPT2"; exon_number "1"; exon_id "EXON1";'
+            'gene_id "GENE2"; transcript_id "TRANSCRIPT2"; \
+                exon_number "1"; exon_id "EXON1";'
        )
        assert res == {
@@ -22,6 +22,7 @@ class TestFreeTextParsing:
        }
    def test_dict2str(self):
+        """Test for dict2str function."""
        res = dict_to_str(
            {
                "gene_id": "GENE2",
@@ -31,14 +32,17 @@ class TestFreeTextParsing:
            }
        )
        print(res)
-        assert (
+        assert res == (
-            res
+            'gene_id "GENE2"; '
-            == 'gene_id "GENE2"; transcript_id "TRANSCRIPT2"; exon_number "1"; exon_id "EXON1";'
+            'transcript_id "TRANSCRIPT2"; '
+            'exon_number "1"; '
+            'exon_id "EXON1";'
        )
 class TestGtf:
-    "Test if Gtf class works correctly."
+    """Test if Gtf class works correctly."""
    cols = [
        "seqname",
        "source",
@@ -52,19 +56,21 @@ class TestGtf:
    ]
    def test_init(self):
+        """Test for init function."""
        annotations = Gtf()
        annotations.read_file("tests/resources/Annotation1.gtf")
-        assert annotations.parsed == False
+        assert annotations.parsed is False
        assert annotations.original_columns == self.cols
        assert annotations.free_text_columns == []
    def test_parsed(self):
+        """Test for parsed function."""
        annotations = Gtf()
        annotations.read_file("tests/resources/Annotation1.gtf")
        annotations.parse_key_value()
-        assert annotations.parsed == True
+        assert annotations.parsed is True
        assert set(annotations.free_text_columns) == set(
            [
                "gene_id",
@@ -75,11 +81,14 @@ class TestGtf:
            ]
        )
        assert set(annotations.original_columns) == set(
-            ["seqname", "source", "feature", "start", "end", "score", "strand", "frame"]
+            ["seqname", "source", "feature", "start",
+             "end", "score", "strand", "frame"]
        )
 class TestTranscriptGenerator:
+    """Test for TranscriptGenerator class."""
    cols = [
        "start",
        "end",
@@ -98,35 +107,31 @@ class TestTranscriptGenerator:
    df2 = pd.DataFrame(columns=["start", "end", "strand"])
    def test_init(self):
+        """Test for init."""
        transcripts = TranscriptGenerator("TRANSCRIPT1", 3, self.df1, 0.05)
        assert transcripts.strand == "+"
-    def test_init_2(self):
-        with pytest.raises(AssertionError):
-            transcripts = TranscriptGenerator("TRANSCRIPT2", 3, self.df2, 0.05)
-    def test_init_3(self):
-        with pytest.raises(AssertionError):
-            transcripts = TranscriptGenerator("TRANSCRIPT1", 0, self.df1, 0.05)
    def test_inclusions(self):
+        """Test for inclusions."""
        transcripts = TranscriptGenerator("TRANSCRIPT1", 3, self.df1, 0.5)
-        res = transcripts._get_inclusions()
+        res = transcripts.get_inclusions()
        assert res.shape == (3, 3)
    def test_unique_inclusions(self):
+        """Test for unique inclusions."""
        transcripts = TranscriptGenerator("TRANSCRIPT1", 3, self.df1, 0.5)
-        res1, res2, res3 = transcripts._get_unique_inclusions()
+        transcripts.get_unique_inclusions()
    def test_get_df(self):
+        """Test for get_df function."""
        inclusions = [False, True, False]
        expected_end = pd.Series([20, 79, 100], name="end")
        transcript_id = "TRANSCRIPT1_1"
        transcripts = TranscriptGenerator("TRANSCRIPT1", 3, self.df1, 0.5)
-        res = transcripts._get_df(inclusions, transcript_id)
+        res = transcripts.get_df(inclusions, transcript_id)
        assert res["transcript_id"].unique().item() == "TRANSCRIPT1_1"
        assert res["strand"].unique().item() == "+"


--- a/tsg/cli.py
+++ b/tsg/cli.py
@@ -6,8 +6,9 @@ from pathlib import Path
 from tsg.main import sample_transcripts
-def setup_logging(loglevel: str = None) -> None:
+def setup_logging(loglevel: str) -> None:
-    """Set up logging. Loglevel can be one of ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"].
+    """Set up logging. Loglevel can be one of \
+        ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"].
    Args:
        loglevel: Level of log output.
@@ -29,13 +30,14 @@ def setup_logging(loglevel: str = None) -> None:
            raise
    logging.basicConfig(
-        format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
+        format='[%(asctime)s: %(levelname)s] \
+            %(message)s (module "%(module)s")',
        level=numeric_level,
    )
-def build_arg_parser() -> argparse.ArgumentParser:
+def build_arg_parser() -> argparse.Namespace:
-    """Builds the argument parser.
+    """Build the argument parser.
    Args:
        1) path to the csv-file with the number of transcripts
@@ -71,7 +73,8 @@ def build_arg_parser() -> argparse.ArgumentParser:
        "--log",
        type=str,
        default="INFO",
-        help='Level of logging. Can be one of ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]',
+        help='Level of logging. Can be one of \
+            ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]',
    )
    args = parser.parse_args()
@@ -110,7 +113,8 @@ def output_filename(filename: str) -> str:
 def app():
-    """Gets the args, sets up the logging and starts the programm with the provided parameters.
+    """Get the args, sets up the logging \
+        and starts the programm with the provided parameters.
    Args:
        1) path to the csv-file with the number of transcripts


--- a/tsg/main.py
+++ b/tsg/main.py