diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d0ca3580ed585f2fccb7c99b6b6d3890fd63e154..3ff981c274b7db202d782caa50f2935d6843ed6c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -20,7 +20,7 @@ unit-test-job: # This job runs in the test stage. - pip install -r requirements.txt - pip install -r requirements_dev.txt - pip install -e . - - coverage run --source term_frag_sel -m pytest + - coverage run --source cdna -m pytest - coverage report -m lint-test-job: # This job also runs in the test stage. @@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage. - pip install -r requirements.txt - pip install -r requirements_dev.txt - pip install -e . - - flake8 --docstring-convention google term_frag_sel/ tests/ - - pylint term_frag_sel/ tests/ - - mypy term_frag_sel/ \ No newline at end of file + - flake8 --docstring-convention google cdna/ tests/ + - pylint cdna/ tests/ + - mypy cdna/ \ No newline at end of file diff --git a/cdna/__init__.py b/cdna/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bb7d5f3186cb2e516714c06c9a5e8d2b57696586 100644 --- a/cdna/__init__.py +++ b/cdna/__init__.py @@ -0,0 +1 @@ +"""Initialise package.""" diff --git a/cdna/cdna.py b/cdna/cdna.py index d8acd5f05c684854327b61db125edc9b168c3d9a..e6c5720ed4996edd5c785fbd22638a8c61376738 100644 --- a/cdna/cdna.py +++ b/cdna/cdna.py @@ -1,5 +1,5 @@ +"""cDNA generator.""" import warnings - import pandas as pd from Bio import SeqIO from Bio.Seq import Seq @@ -11,8 +11,8 @@ warnings.filterwarnings(action="ignore", category=FutureWarning) def complement(res: str) -> str: - """ - Returns the cDNA complement of a given base pair + """Return the cDNA complement of a given base pair. + Args: res: residue code. @@ -28,9 +28,8 @@ def complement(res: str) -> str: def seq_complement(sequence: str) -> str or None: - """ - Returns the corresponding cDNA sequence by finding the complementary - base pairs and returning the reversed sequence. + """Return the corresponding cDNA sequence by finding the complementary \ + base pairs and returning the reversed sequence. Args: sequence: sequence to be converted into cDNA. @@ -40,16 +39,17 @@ def seq_complement(sequence: str) -> str or None: """ if sequence is None: return None - _ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string + _ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string # noqa: E501 return _ class CDNAGen: - """ - Module that performs the cDNA synthesis. - """ + """Perform the cDNA synthesis.""" - def __init__(self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str): + def __init__( + self, ifasta: str, igtf: str, icpn: str, ofasta: str, ocsv: str + ): + """Initialise function.""" # inputs self.fasta = ifasta self.gtf = igtf @@ -65,8 +65,8 @@ class CDNAGen: self.run() def run(self) -> None: - """ - Executes the cDNA workflow. + """Execute the cDNA workflow. + Returns: None """ @@ -80,7 +80,7 @@ class CDNAGen: self.write_csv() def add_records(self) -> None: - """Adds data records to fasta file. + """Add data records to fasta file. Adds the copy number information to the fasta records. @@ -88,7 +88,7 @@ class CDNAGen: """ self.fasta_records = [] - for index, row in self.gtf_df.iterrows(): + for _, row in self.gtf_df.iterrows(): if row["complement"] is not None: copy_number = row["Transcript_Copy_Number"] for _ in range(int(copy_number)): @@ -101,8 +101,8 @@ class CDNAGen: self.fasta_records.append(record) def add_sequences(self) -> None: - """ - Adds the sequence for a given priming site. + """Add the sequence for a given priming site. + Returns: None """ @@ -112,8 +112,8 @@ class CDNAGen: ) def add_complement(self) -> None: - """ - Adds the complementary cDNA sequence. + """Add the complementary cDNA sequence. + Returns: None """ @@ -122,7 +122,7 @@ class CDNAGen: ) def read_primingsite(self, sequence: str, end: int) -> None: - """Read a fasta file from a given start character + """Read a fasta file from a given start character. Reads a fasta sequence with ID (sequence) and returns the sequence starting from the index start. @@ -151,7 +151,7 @@ class CDNAGen: self.fasta_dict = {x.name: x for x in records} def read_csv(self) -> None: - """Reads a given copy number csv file + """Read a given copy number csv file. Wrapper for Pandas read_csv. @@ -159,24 +159,27 @@ class CDNAGen: """ df_csv = pd.read_csv(self.cpn, index_col=False) - df_csv = df_csv.reset_index() # make sure indexes pair with number of rows + df_csv = df_csv.reset_index() # make sure indexes pair with number of rows # noqa: E501 self.csv_df = df_csv def read_gtf(self) -> None: """Read and process the GTF file. - Reads a GTF file and determines copy numbers from normalized probabilities. + Reads a GTF file and determines copy numbers from \ + normalized probabilities. Returns: None """ - # returns GTF with essential columns such as "feature", "seqname", "start", "end" - # alongside the names of any optional keys which appeared in the attribute column + # returns GTF with essential columns such as \ + # "feature", "seqname", "start", "end" + # alongside the names of any optional keys \ + # which appeared in the attribute column gtf_df = read_gtf(self.gtf) gtf_df["Binding_Probability"] = pd.to_numeric( gtf_df["Binding_Probability"] ) # convert to numeric - df_normalization_bind_probablility = gtf_df.groupby("seqname")[ + df_norm_bind_prob = gtf_df.groupby("seqname")[ "Binding_Probability" ].sum() # extract binding probability count = 0 @@ -194,7 +197,7 @@ class CDNAGen: id_csv = str(row["seqname"]).split("_")[1] # Calculate Normalized_Binding_Probability and add to GTF dataframe gtf_df.loc[index, "Normalized_Binding_Probability"] = ( - row["Binding_Probability"] / df_normalization_bind_probablility[id_] + row["Binding_Probability"] / df_norm_bind_prob[id_] ) # Calculate Normalized_Binding_Probability and add to GTF dataframe csv_transcript_copy_number = self.csv_df.loc[ @@ -211,7 +214,7 @@ class CDNAGen: self.gtf_df = gtf_df def write_fasta(self) -> None: - """Writes cDNA fasta records to file. + """Write cDNA fasta records to file. Wrapper for SeqIO.write. @@ -222,7 +225,7 @@ class CDNAGen: print(f"Fasta file successfully written to: {self.output_fasta}") def write_csv(self) -> None: - """Writes the copy number information to a csv file. + """Write the copy number information to a csv file. Wrapper for Pandas to_csv. @@ -232,4 +235,5 @@ class CDNAGen: self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]].to_csv( self.output_csv, index=False ) - print(f"Copy number csv file successfully written to: {self.output_csv}") + print(f"Copy number csv file successfully written to: \ + {self.output_csv}")