Skip to content
Snippets Groups Projects
Commit 572246f7 authored by Mate Balajti's avatar Mate Balajti
Browse files

feat: add lint job to CI

parent 7fd08564
No related branches found
No related tags found
1 merge request!6feat: add testing and linting to CI workflow
...@@ -20,7 +20,7 @@ unit-test-job: # This job runs in the test stage. ...@@ -20,7 +20,7 @@ unit-test-job: # This job runs in the test stage.
- pip install -r requirements.txt - pip install -r requirements.txt
- pip install -r requirements_dev.txt - pip install -r requirements_dev.txt
- pip install -e . - pip install -e .
- coverage run --source term_frag_sel -m pytest - coverage run --source cdna -m pytest
- coverage report -m - coverage report -m
lint-test-job: # This job also runs in the test stage. lint-test-job: # This job also runs in the test stage.
...@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage. ...@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage.
- pip install -r requirements.txt - pip install -r requirements.txt
- pip install -r requirements_dev.txt - pip install -r requirements_dev.txt
- pip install -e . - pip install -e .
- flake8 --docstring-convention google term_frag_sel/ tests/ - flake8 --docstring-convention google cdna/ tests/
- pylint term_frag_sel/ tests/ - pylint cdna/ tests/
- mypy term_frag_sel/ - mypy cdna/
\ No newline at end of file \ No newline at end of file
"""Initialise package."""
"""cDNA generator."""
import warnings import warnings
import pandas as pd import pandas as pd
from Bio import SeqIO from Bio import SeqIO
from Bio.Seq import Seq from Bio.Seq import Seq
...@@ -11,8 +11,8 @@ warnings.filterwarnings(action="ignore", category=FutureWarning) ...@@ -11,8 +11,8 @@ warnings.filterwarnings(action="ignore", category=FutureWarning)
def complement(res: str) -> str: def complement(res: str) -> str:
""" """Return the cDNA complement of a given base pair.
Returns the cDNA complement of a given base pair
Args: Args:
res: residue code. res: residue code.
...@@ -28,9 +28,8 @@ def complement(res: str) -> str: ...@@ -28,9 +28,8 @@ def complement(res: str) -> str:
def seq_complement(sequence: str) -> str or None: def seq_complement(sequence: str) -> str or None:
""" """Return the corresponding cDNA sequence by finding the complementary \
Returns the corresponding cDNA sequence by finding the complementary base pairs and returning the reversed sequence.
base pairs and returning the reversed sequence.
Args: Args:
sequence: sequence to be converted into cDNA. sequence: sequence to be converted into cDNA.
...@@ -40,16 +39,17 @@ def seq_complement(sequence: str) -> str or None: ...@@ -40,16 +39,17 @@ def seq_complement(sequence: str) -> str or None:
""" """
if sequence is None: if sequence is None:
return None return None
_ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string _ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string # noqa: E501
return _ return _
class CDNAGen: class CDNAGen:
""" """Perform the cDNA synthesis."""
Module that performs the cDNA synthesis.
"""
def __init__(self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str): def __init__(
self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str
):
"""Initialise function."""
# inputs # inputs
self.fasta = ifasta self.fasta = ifasta
self.gtf = igtf self.gtf = igtf
...@@ -65,8 +65,8 @@ class CDNAGen: ...@@ -65,8 +65,8 @@ class CDNAGen:
self.run() self.run()
def run(self) -> None: def run(self) -> None:
""" """Execute the cDNA workflow.
Executes the cDNA workflow.
Returns: None Returns: None
""" """
...@@ -80,7 +80,7 @@ class CDNAGen: ...@@ -80,7 +80,7 @@ class CDNAGen:
self.write_csv() self.write_csv()
def add_records(self) -> None: def add_records(self) -> None:
"""Adds data records to fasta file. """Add data records to fasta file.
Adds the copy number information to the fasta records. Adds the copy number information to the fasta records.
...@@ -88,7 +88,7 @@ class CDNAGen: ...@@ -88,7 +88,7 @@ class CDNAGen:
""" """
self.fasta_records = [] self.fasta_records = []
for index, row in self.gtf_df.iterrows(): for _, row in self.gtf_df.iterrows():
if row["complement"] is not None: if row["complement"] is not None:
copy_number = row["Transcript_Copy_Number"] copy_number = row["Transcript_Copy_Number"]
for _ in range(int(copy_number)): for _ in range(int(copy_number)):
...@@ -101,8 +101,8 @@ class CDNAGen: ...@@ -101,8 +101,8 @@ class CDNAGen:
self.fasta_records.append(record) self.fasta_records.append(record)
def add_sequences(self) -> None: def add_sequences(self) -> None:
""" """Add the sequence for a given priming site.
Adds the sequence for a given priming site.
Returns: None Returns: None
""" """
...@@ -112,8 +112,8 @@ class CDNAGen: ...@@ -112,8 +112,8 @@ class CDNAGen:
) )
def add_complement(self) -> None: def add_complement(self) -> None:
""" """Add the complementary cDNA sequence.
Adds the complementary cDNA sequence.
Returns: None Returns: None
""" """
...@@ -122,7 +122,7 @@ class CDNAGen: ...@@ -122,7 +122,7 @@ class CDNAGen:
) )
def read_primingsite(self, sequence: str, end: int) -> None: def read_primingsite(self, sequence: str, end: int) -> None:
"""Read a fasta file from a given start character """Read a fasta file from a given start character.
Reads a fasta sequence with ID (sequence) and returns the Reads a fasta sequence with ID (sequence) and returns the
sequence starting from the index start. sequence starting from the index start.
...@@ -151,7 +151,7 @@ class CDNAGen: ...@@ -151,7 +151,7 @@ class CDNAGen:
self.fasta_dict = {x.name: x for x in records} self.fasta_dict = {x.name: x for x in records}
def read_csv(self) -> None: def read_csv(self) -> None:
"""Reads a given copy number csv file """Read a given copy number csv file.
Wrapper for Pandas read_csv. Wrapper for Pandas read_csv.
...@@ -159,24 +159,27 @@ class CDNAGen: ...@@ -159,24 +159,27 @@ class CDNAGen:
""" """
df_csv = pd.read_csv(self.cpn, index_col=False) df_csv = pd.read_csv(self.cpn, index_col=False)
df_csv = df_csv.reset_index() # make sure indexes pair with number of rows df_csv = df_csv.reset_index() # make sure indexes pair with number of rows # noqa: E501
self.csv_df = df_csv self.csv_df = df_csv
def read_gtf(self) -> None: def read_gtf(self) -> None:
"""Read and process the GTF file. """Read and process the GTF file.
Reads a GTF file and determines copy numbers from normalized probabilities. Reads a GTF file and determines copy numbers from \
normalized probabilities.
Returns: None Returns: None
""" """
# returns GTF with essential columns such as "feature", "seqname", "start", "end" # returns GTF with essential columns such as \
# alongside the names of any optional keys which appeared in the attribute column # "feature", "seqname", "start", "end"
# alongside the names of any optional keys \
# which appeared in the attribute column
gtf_df = read_gtf(self.gtf) gtf_df = read_gtf(self.gtf)
gtf_df["Binding_Probability"] = pd.to_numeric( gtf_df["Binding_Probability"] = pd.to_numeric(
gtf_df["Binding_Probability"] gtf_df["Binding_Probability"]
) # convert to numeric ) # convert to numeric
df_normalization_bind_probablility = gtf_df.groupby("seqname")[ df_norm_bind_prob = gtf_df.groupby("seqname")[
"Binding_Probability" "Binding_Probability"
].sum() # extract binding probability ].sum() # extract binding probability
count = 0 count = 0
...@@ -194,7 +197,7 @@ class CDNAGen: ...@@ -194,7 +197,7 @@ class CDNAGen:
id_csv = str(row["seqname"]).split("_")[1] id_csv = str(row["seqname"]).split("_")[1]
# Calculate Normalized_Binding_Probability and add to GTF dataframe # Calculate Normalized_Binding_Probability and add to GTF dataframe
gtf_df.loc[index, "Normalized_Binding_Probability"] = ( gtf_df.loc[index, "Normalized_Binding_Probability"] = (
row["Binding_Probability"] / df_normalization_bind_probablility[id_] row["Binding_Probability"] / df_norm_bind_prob[id_]
) )
# Calculate Normalized_Binding_Probability and add to GTF dataframe # Calculate Normalized_Binding_Probability and add to GTF dataframe
csv_transcript_copy_number = self.csv_df.loc[ csv_transcript_copy_number = self.csv_df.loc[
...@@ -211,7 +214,7 @@ class CDNAGen: ...@@ -211,7 +214,7 @@ class CDNAGen:
self.gtf_df = gtf_df self.gtf_df = gtf_df
def write_fasta(self) -> None: def write_fasta(self) -> None:
"""Writes cDNA fasta records to file. """Write cDNA fasta records to file.
Wrapper for SeqIO.write. Wrapper for SeqIO.write.
...@@ -222,7 +225,7 @@ class CDNAGen: ...@@ -222,7 +225,7 @@ class CDNAGen:
print(f"Fasta file successfully written to: {self.output_fasta}") print(f"Fasta file successfully written to: {self.output_fasta}")
def write_csv(self) -> None: def write_csv(self) -> None:
"""Writes the copy number information to a csv file. """Write the copy number information to a csv file.
Wrapper for Pandas to_csv. Wrapper for Pandas to_csv.
...@@ -232,4 +235,5 @@ class CDNAGen: ...@@ -232,4 +235,5 @@ class CDNAGen:
self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]].to_csv( self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]].to_csv(
self.output_csv, index=False self.output_csv, index=False
) )
print(f"Copy number csv file successfully written to: {self.output_csv}") print(f"Copy number csv file successfully written to: \
{self.output_csv}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment