diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 397b934540596614faec566083079aa6e9f17cd9..3ff981c274b7db202d782caa50f2935d6843ed6c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,9 +3,32 @@ default: - docker image: python:3.10-slim-buster -my_tests: - # run tests +stages: # List of stages for jobs, and their order of execution + - build + - test + +build-job: # This job runs in the build stage, which runs first. + stage: build + script: + - pip install -r requirements.txt + - pip install -r requirements_dev.txt + - pip install -e . + +unit-test-job: # This job runs in the test stage. + stage: test # It only starts when the job in the build stage completes successfully. + script: + - pip install -r requirements.txt + - pip install -r requirements_dev.txt + - pip install -e . + - coverage run --source cdna -m pytest + - coverage report -m + +lint-test-job: # This job also runs in the test stage. + stage: test # It can run at the same time as unit-test-job (in parallel). script: - - pip install . - - pip install -r requirements-dev.txt - - pytest \ No newline at end of file + - pip install -r requirements.txt + - pip install -r requirements_dev.txt + - pip install -e . + - flake8 --docstring-convention google cdna/ tests/ + - pylint cdna/ tests/ + - mypy cdna/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b81b018ad684f3a35fee301741b2734c8f4..0000000000000000000000000000000000000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/cdna-generator.iml b/.idea/cdna-generator.iml deleted file mode 100644 index d0876a78d06ac03b5d78c8dcdb95570281c6f1d6..0000000000000000000000000000000000000000 --- a/.idea/cdna-generator.iml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="inheritedJdk" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> -</module> \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 25bde2c3afaab1a315ae039135cb178c10118d3f..0000000000000000000000000000000000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,14 +0,0 @@ -<component name="InspectionProjectProfileManager"> - <profile version="1.0"> - <option name="myName" value="Project Default" /> - <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true"> - <option name="ignoredPackages"> - <value> - <list size="1"> - <item index="0" class="java.lang.String" itemvalue="numpy" /> - </list> - </value> - </option> - </inspection_tool> - </profile> -</component> \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da2d6447d11dfe32bfb846c3d5b199fc99..0000000000000000000000000000000000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ -<component name="InspectionProjectProfileManager"> - <settings> - <option name="USE_PROJECT_PROFILE" value="false" /> - <version value="1.0" /> - </settings> -</component> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index d1e22ecb89619a9c2dcf51a28d891a196d2462a0..0000000000000000000000000000000000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 77e11905a5f9573959d9d8e478c7fe18853d732f..0000000000000000000000000000000000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/cdna-generator.iml" filepath="$PROJECT_DIR$/.idea/cdna-generator.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/.idea/sonarlint/issuestore/index.pb b/.idea/sonarlint/issuestore/index.pb deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7f4cb416c083d265558da75d457237d671..0000000000000000000000000000000000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$" vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/cdna/__init__.py b/cdna/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bb7d5f3186cb2e516714c06c9a5e8d2b57696586 100644 --- a/cdna/__init__.py +++ b/cdna/__init__.py @@ -0,0 +1 @@ +"""Initialise package.""" diff --git a/cdna/cdna.py b/cdna/cdna.py index d8acd5f05c684854327b61db125edc9b168c3d9a..7593fb0ed28d61762c70c084a62457d310b8b9eb 100644 --- a/cdna/cdna.py +++ b/cdna/cdna.py @@ -1,18 +1,19 @@ +"""cDNA generator.""" import warnings - -import pandas as pd -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -from gtfparse import read_gtf +from typing import Optional, List, Dict, Any +import pandas as pd # type: ignore +from Bio import SeqIO # type: ignore +from Bio.Seq import Seq # type: ignore +from Bio.SeqRecord import SeqRecord # type: ignore +from gtfparse import read_gtf # type: ignore # ignore warnings from read_gtf warnings.filterwarnings(action="ignore", category=FutureWarning) def complement(res: str) -> str: - """ - Returns the cDNA complement of a given base pair + """Return the cDNA complement of a given base pair. + Args: res: residue code. @@ -21,16 +22,15 @@ def complement(res: str) -> str: """ translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"} - if res not in translate_dict.keys(): + if res not in translate_dict: print(f"Unknown character, {res}") raise ValueError return translate_dict[res] -def seq_complement(sequence: str) -> str or None: - """ - Returns the corresponding cDNA sequence by finding the complementary - base pairs and returning the reversed sequence. +def seq_complement(sequence: str) -> Optional[str]: + """Return the corresponding cDNA sequence by finding the complementary \ + base pairs and returning the reversed sequence. Args: sequence: sequence to be converted into cDNA. @@ -40,16 +40,19 @@ def seq_complement(sequence: str) -> str or None: """ if sequence is None: return None - _ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string + _ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string # noqa: E501 return _ +# pylint: disable=R0902 class CDNAGen: - """ - Module that performs the cDNA synthesis. - """ + """Perform the cDNA synthesis.""" - def __init__(self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str): + # pylint: disable=R0913 + def __init__( + self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str + ): + """Initialise function.""" # inputs self.fasta = ifasta self.gtf = igtf @@ -58,15 +61,15 @@ class CDNAGen: self.output_csv = ocsv # variables - self.csv_df = None - self.fasta_dict = None - self.fasta_records = None - self.gtf_df = None + self.csv_df = pd.DataFrame() + self.fasta_dict: Dict[str, Any] = {} + self.fasta_records: List[SeqRecord] = [] + self.gtf_df = pd.DataFrame() self.run() def run(self) -> None: - """ - Executes the cDNA workflow. + """Execute the cDNA workflow. + Returns: None """ @@ -80,7 +83,7 @@ class CDNAGen: self.write_csv() def add_records(self) -> None: - """Adds data records to fasta file. + """Add data records to fasta file. Adds the copy number information to the fasta records. @@ -88,7 +91,7 @@ class CDNAGen: """ self.fasta_records = [] - for index, row in self.gtf_df.iterrows(): + for _, row in self.gtf_df.iterrows(): if row["complement"] is not None: copy_number = row["Transcript_Copy_Number"] for _ in range(int(copy_number)): @@ -101,8 +104,8 @@ class CDNAGen: self.fasta_records.append(record) def add_sequences(self) -> None: - """ - Adds the sequence for a given priming site. + """Add the sequence for a given priming site. + Returns: None """ @@ -112,17 +115,17 @@ class CDNAGen: ) def add_complement(self) -> None: - """ - Adds the complementary cDNA sequence. + """Add the complementary cDNA sequence. + Returns: None """ self.gtf_df["complement"] = self.gtf_df["priming_site"].apply( - lambda x: seq_complement(x) - ) + seq_complement + ) def read_primingsite(self, sequence: str, end: int) -> None: - """Read a fasta file from a given start character + """Read a fasta file from a given start character. Reads a fasta sequence with ID (sequence) and returns the sequence starting from the index start. @@ -151,7 +154,7 @@ class CDNAGen: self.fasta_dict = {x.name: x for x in records} def read_csv(self) -> None: - """Reads a given copy number csv file + """Read a given copy number csv file. Wrapper for Pandas read_csv. @@ -159,24 +162,27 @@ class CDNAGen: """ df_csv = pd.read_csv(self.cpn, index_col=False) - df_csv = df_csv.reset_index() # make sure indexes pair with number of rows + df_csv = df_csv.reset_index() # make sure indexes pair with number of rows # noqa: E501 self.csv_df = df_csv def read_gtf(self) -> None: """Read and process the GTF file. - Reads a GTF file and determines copy numbers from normalized probabilities. + Reads a GTF file and determines copy numbers from \ + normalized probabilities. Returns: None """ - # returns GTF with essential columns such as "feature", "seqname", "start", "end" - # alongside the names of any optional keys which appeared in the attribute column + # returns GTF with essential columns such as \ + # "feature", "seqname", "start", "end" + # alongside the names of any optional keys \ + # which appeared in the attribute column gtf_df = read_gtf(self.gtf) gtf_df["Binding_Probability"] = pd.to_numeric( gtf_df["Binding_Probability"] ) # convert to numeric - df_normalization_bind_probablility = gtf_df.groupby("seqname")[ + df_norm_bind_prob = gtf_df.groupby("seqname")[ "Binding_Probability" ].sum() # extract binding probability count = 0 @@ -194,7 +200,7 @@ class CDNAGen: id_csv = str(row["seqname"]).split("_")[1] # Calculate Normalized_Binding_Probability and add to GTF dataframe gtf_df.loc[index, "Normalized_Binding_Probability"] = ( - row["Binding_Probability"] / df_normalization_bind_probablility[id_] + row["Binding_Probability"] / df_norm_bind_prob[id_] ) # Calculate Normalized_Binding_Probability and add to GTF dataframe csv_transcript_copy_number = self.csv_df.loc[ @@ -211,7 +217,7 @@ class CDNAGen: self.gtf_df = gtf_df def write_fasta(self) -> None: - """Writes cDNA fasta records to file. + """Write cDNA fasta records to file. Wrapper for SeqIO.write. @@ -222,14 +228,14 @@ class CDNAGen: print(f"Fasta file successfully written to: {self.output_fasta}") def write_csv(self) -> None: - """Writes the copy number information to a csv file. + """Write the copy number information to a csv file. Wrapper for Pandas to_csv. Returns: None """ - self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]].to_csv( - self.output_csv, index=False - ) - print(f"Copy number csv file successfully written to: {self.output_csv}") + df_to_save = self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]] + df_to_save.to_csv(self.output_csv, index=False) + print(f"Copy number csv file successfully written to: \ + {self.output_csv}") diff --git a/cdna/cli.py b/cdna/cli.py index 86f1babf669c3aeadb136922d036ca442eeef8a1..ce568d8548cc1e058c77204f90c600b0f1fcb17b 100644 --- a/cdna/cli.py +++ b/cdna/cli.py @@ -1,15 +1,17 @@ +"""Receive command line arguments.""" + import argparse import logging -from cdna import CDNAGen +from cdna.cdna import CDNAGen -def cdna_parser() -> None: - """Parser for cDNA generator +def cdna_parser() -> CDNAGen: + """Parse sequences for cDNA generator. Parses command line arguments for cDNA generation. - Returns: None + Returns: CDNAGen instance """ parser = argparse.ArgumentParser( @@ -48,7 +50,8 @@ def cdna_parser() -> None: if __name__ == "__main__": logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', + format='[%(asctime)s: %(levelname)s] %(message)s \ + (module "%(module)s")', level=logging.INFO, ) LOG = logging.getLogger(__name__) diff --git a/requirements-dev.txt b/requirements_dev.txt similarity index 72% rename from requirements-dev.txt rename to requirements_dev.txt index a511aad9d7b7d9ab2e6d885ba48915c49d907b28..4360923ac5b030fc34743142d9a850b830995e7d 100644 --- a/requirements-dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,5 @@ -gtfparse -biopython pytest +coverage black flake8 flake8-docstrings diff --git a/tests/test_cdna.py b/tests/test_cdna.py index 5ad8caa2b55d5159d1e2a90591a8d07f2ab5cdb1..102f4238186c483d1f71c487e4820bb27e9b750f 100644 --- a/tests/test_cdna.py +++ b/tests/test_cdna.py @@ -1,19 +1,25 @@ -# imports +"""Tests for cDNA functions.""" import pytest from cdna.cdna import complement, seq_complement + @pytest.mark.parametrize( "test_input,expected", [("A", "T")] ) -def test_complement_param(test_input, expected): # we need to pass the lists to the test function... +# we need to pass the lists to the test function... +def test_complement_param(test_input, expected): + """Test complement() function.""" assert complement(test_input) == expected + @pytest.mark.parametrize( "test_input,expected", [("AA", "TT")] ) -def test_seq_complement_param(test_input, expected): # we need to pass the lists to the test function... +# we need to pass the lists to the test function... +def test_seq_complement_param(test_input, expected): + """Test seq_complement() function.""" assert seq_complement(test_input) == expected @@ -23,13 +29,16 @@ def test_seq_complement_param(test_input, expected): # we need to pass the list [(1, ValueError)] ) def test_complement_param_failing(test_input, expected): + """Test complement() fail function.""" with pytest.raises(expected): complement(test_input) + @pytest.mark.parametrize( "test_input,expected", [("11", ValueError)] ) -def test_complement_param_failing(test_input, expected): +def test_seq_complement_param_failing(test_input, expected): + """Test seq_complement() fail function.""" with pytest.raises(expected): seq_complement(test_input)