feat: add lint job to CI

572246f7 · Mate Balajti · 7fd08564 · 572246f7 · 572246f7 · 572246f7
Commit 572246f7 authored 1 year ago by Mate Balajti
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -20,7 +20,7 @@ unit-test-job:   # This job runs in the test stage.
    - pip install -r requirements.txt
    - pip install -r requirements_dev.txt
    - pip install -e .
-    - coverage run --source term_frag_sel -m pytest
+    - coverage run --source cdna -m pytest
    - coverage report -m
 lint-test-job:   # This job also runs in the test stage.
@@ -29,6 +29,6 @@ lint-test-job:   # This job also runs in the test stage.
    - pip install -r requirements.txt
    - pip install -r requirements_dev.txt
    - pip install -e .
-    - flake8 --docstring-convention google term_frag_sel/ tests/
+    - flake8 --docstring-convention google cdna/ tests/
-    - pylint term_frag_sel/ tests/
+    - pylint cdna/ tests/
-    - mypy term_frag_sel/
+    - mypy cdna/
\ No newline at end of file
--- a/cdna/__init__.py
+++ b/cdna/__init__.py
+"""Initialise package."""
--- a/cdna/cdna.py
+++ b/cdna/cdna.py
+"""cDNA generator."""
 import warnings
 import pandas as pd
 from Bio import SeqIO
 from Bio.Seq import Seq
@@ -11,8 +11,8 @@ warnings.filterwarnings(action="ignore", category=FutureWarning)
 def complement(res: str) -> str:
-    """
+    """Return the cDNA complement of a given base pair.
-    Returns the cDNA complement of a given base pair
    Args:
        res: residue code.
@@ -28,9 +28,8 @@ def complement(res: str) -> str:
 def seq_complement(sequence: str) -> str or None:
-    """
+    """Return the corresponding cDNA sequence by finding the complementary \
-    Returns the corresponding cDNA sequence by finding the complementary
+        base pairs and returning the reversed sequence.
-    base pairs and returning the reversed sequence.
    Args:
        sequence: sequence to be converted into cDNA.
@@ -40,16 +39,17 @@ def seq_complement(sequence: str) -> str or None:
    """
    if sequence is None:
        return None
-    _ = "".join([complement(char) for char in str(sequence)])[::-1]  # reverse string
+    _ = "".join([complement(char) for char in str(sequence)])[::-1]  # reverse string # noqa: E501
    return _
 class CDNAGen:
-    """
+    """Perform the cDNA synthesis."""
-    Module that performs the cDNA synthesis.
-    """
-    def __init__(self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str):
+    def __init__(
+        self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str
+    ):
+        """Initialise function."""
        # inputs
        self.fasta = ifasta
        self.gtf = igtf
@@ -65,8 +65,8 @@ class CDNAGen:
        self.run()
    def run(self) -> None:
-        """
+        """Execute the cDNA workflow.
-        Executes the cDNA workflow.
        Returns: None
        """
@@ -80,7 +80,7 @@ class CDNAGen:
        self.write_csv()
    def add_records(self) -> None:
-        """Adds data records to fasta file.
+        """Add data records to fasta file.
        Adds the copy number information to the fasta records.
@@ -88,7 +88,7 @@ class CDNAGen:
        """
        self.fasta_records = []
-        for index, row in self.gtf_df.iterrows():
+        for _, row in self.gtf_df.iterrows():
            if row["complement"] is not None:
                copy_number = row["Transcript_Copy_Number"]
                for _ in range(int(copy_number)):
@@ -101,8 +101,8 @@ class CDNAGen:
                    self.fasta_records.append(record)
    def add_sequences(self) -> None:
-        """
+        """Add the sequence for a given priming site.
-        Adds the sequence for a given priming site.
        Returns: None
        """
@@ -112,8 +112,8 @@ class CDNAGen:
        )
    def add_complement(self) -> None:
-        """
+        """Add the complementary cDNA sequence.
-        Adds the complementary cDNA sequence.
        Returns: None
        """
@@ -122,7 +122,7 @@ class CDNAGen:
        )
    def read_primingsite(self, sequence: str, end: int) -> None:
-        """Read a fasta file from a given start character
+        """Read a fasta file from a given start character.
        Reads a fasta sequence with ID (sequence) and returns the
        sequence starting from the index start.
@@ -151,7 +151,7 @@ class CDNAGen:
        self.fasta_dict = {x.name: x for x in records}
    def read_csv(self) -> None:
-        """Reads a given copy number csv file
+        """Read a given copy number csv file.
        Wrapper for Pandas read_csv.
@@ -159,24 +159,27 @@ class CDNAGen:
        """
        df_csv = pd.read_csv(self.cpn, index_col=False)
-        df_csv = df_csv.reset_index()  # make sure indexes pair with number of rows
+        df_csv = df_csv.reset_index()  # make sure indexes pair with number of rows # noqa: E501 
        self.csv_df = df_csv
    def read_gtf(self) -> None:
        """Read and process the GTF file.
-        Reads a GTF file and determines copy numbers from normalized probabilities.
+        Reads a GTF file and determines copy numbers from \
+            normalized probabilities.
        Returns: None
        """
-        # returns GTF with essential columns such as "feature", "seqname", "start", "end"
+        # returns GTF with essential columns such as \
-        # alongside the names of any optional keys which appeared in the attribute column
+        # "feature", "seqname", "start", "end"
+        # alongside the names of any optional keys \
+        # which appeared in the attribute column
        gtf_df = read_gtf(self.gtf)
        gtf_df["Binding_Probability"] = pd.to_numeric(
            gtf_df["Binding_Probability"]
        )  # convert to numeric
-        df_normalization_bind_probablility = gtf_df.groupby("seqname")[
+        df_norm_bind_prob = gtf_df.groupby("seqname")[
            "Binding_Probability"
        ].sum()  # extract binding probability
        count = 0
@@ -194,7 +197,7 @@ class CDNAGen:
            id_csv = str(row["seqname"]).split("_")[1]
            # Calculate Normalized_Binding_Probability and add to GTF dataframe
            gtf_df.loc[index, "Normalized_Binding_Probability"] = (
-                row["Binding_Probability"] / df_normalization_bind_probablility[id_]
+                row["Binding_Probability"] / df_norm_bind_prob[id_]
            )
            # Calculate Normalized_Binding_Probability and add to GTF dataframe
            csv_transcript_copy_number = self.csv_df.loc[
@@ -211,7 +214,7 @@ class CDNAGen:
        self.gtf_df = gtf_df
    def write_fasta(self) -> None:
-        """Writes cDNA fasta records to file.
+        """Write cDNA fasta records to file.
        Wrapper for SeqIO.write.
@@ -222,7 +225,7 @@ class CDNAGen:
        print(f"Fasta file successfully written to: {self.output_fasta}")
    def write_csv(self) -> None:
-        """Writes the copy number information to a csv file.
+        """Write the copy number information to a csv file.
        Wrapper for Pandas to_csv.
@@ -232,4 +235,5 @@ class CDNAGen:
        self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]].to_csv(
            self.output_csv, index=False
        )
-        print(f"Copy number csv file successfully written to: {self.output_csv}")
+        print(f"Copy number csv file successfully written to: \
+              {self.output_csv}")