From 0db28afa685a4047050960946baad1aa251dbffc Mon Sep 17 00:00:00 2001
From: Eric Boittier <ericdavid.boittier@unibas.ch>
Date: Mon, 12 Dec 2022 15:37:25 +0100
Subject: [PATCH] setup.py uses requirements.txt

---
 Dockerfile       |  10 +++++
 cdna/cdna.py     | 112 +++++++++++++++++++++++++++++++++++++----------
 requirements.txt |   3 +-
 setup.py         |   5 ++-
 4 files changed, 104 insertions(+), 26 deletions(-)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..e71e517
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.8-slim-buster 
+
+MAINTAINER EricBoittier
+
+ENV PATH=$PATH:/usr/games/
+
+COPY . .
+
+
+
diff --git a/cdna/cdna.py b/cdna/cdna.py
index 4a1b912..c5f6d31 100644
--- a/cdna/cdna.py
+++ b/cdna/cdna.py
@@ -13,10 +13,12 @@ warnings.filterwarnings(action="ignore", category=FutureWarning)
 
 def compliment(res: str) -> str:
     """
-    Returns the compliment of a given DNA residue.
-    
-    :param res: DNA residue
-    :return:
+    Returns the cDNA compliment of a given base pair
+    Args:
+        res: residue code.
+
+    Returns: corresponding cDNA residue.
+
     """
     translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"}
     if res not in translate_dict.keys():
@@ -27,11 +29,14 @@ def compliment(res: str) -> str:
 
 def seq_compliment(sequence: str) -> str:
     """
-    Returns the corresponding cDNA sequence for a given input by finding the
-    corresponding compliment base pair and reversing the input.
+    Returns the corresponding cDNA sequence by finding the complimentary
+    base pairs and returning the reversed sequence.
+
+    Args:
+        sequence: sequence to be converted into cDNA.
+
+    Returns: corresponding cDNA sequence.
 
-    :param sequence: DNA sequence
-    :return: cDNA sequence
     """
     if sequence is None:
         return "None"
@@ -51,21 +56,26 @@ class CDNAGen:
         # variables
         self.fasta_dict = None
         self.fasta_records = None
-
+        self.df_input_GTF = None
         self.run()
 
-    def run(self):
+    def run(self) -> None:
+        """
+        Executes the cDNA workflow.
+        Returns: None
+
+        """
         self.read_csv()
         self.read_fasta()
         self.read_gtf()
         self.add_sequences()
         self.add_compliment()
         self.add_records()
-        print()
+        print()  # blank line for pretty printing
         self.write_fasta()
         self.write_csv()
 
-    def add_records(self):
+    def add_records(self) -> None:
         self.fasta_records = []
         for index, row in self.df_input_GTF.iterrows():
             if row["compliment"] is not None:
@@ -78,36 +88,79 @@ class CDNAGen:
                 )
                 self.fasta_records.append(record)
 
-    def add_sequences(self):
+    def add_sequences(self) -> None:
+        """
+        Adds the sequence for a given priming site.
+        Returns: None
+
+        """
         self.df_input_GTF["priming_site"] = self.df_input_GTF.apply(
             lambda row: self.read_primingsite(row["seqname"], row["start"]),
             axis=1,
         )
 
-    def add_compliment(self):
+    def add_compliment(self) -> None:
+        """
+        Adds the complimentary cDNA sequence.
+        Returns: None
+
+        """
         self.df_input_GTF["compliment"] = self.df_input_GTF["priming_site"].apply(
             lambda x: seq_compliment(x)
         )
 
-    def read_primingsite(self, sequence, start):
+    def read_primingsite(self, sequence: str, start: int) -> None:
+        """Read a fasta file from a given start character
+
+        Reads a fasta sequence with ID (sequence) and returns the
+        sequence starting from the index start.
+
+        Args:
+            sequence: sequence ID to be read.
+            start: start of the sequence.
+
+        Returns: None
+
+        """
         if sequence not in self.fasta_dict.keys():
             return None
         _ = self.fasta_dict[sequence].seq[start:]
         return _
 
-    def read_fasta(self):
+    def read_fasta(self) -> None:
+        """Read a given fasta file.
+
+        Wrapper for SeqIO.parse.
+
+        Returns: None
+
+        """
         record = SeqIO.parse(self.fasta, "fasta")
         records = list(record)
         self.fasta_dict = {x.name: x for x in records}
 
-    def read_csv(self):
+    def read_csv(self) -> None:
+        """ Reads a given copy number csv file
+
+        Wrapper for Pandas read_csv.
+
+        Returns: None
+
+        """
         df_input_CSV = pd.read_csv(self.cpn, index_col=False)
         df_input_CSV = (
             df_input_CSV.reset_index()
         )  # make sure indexes pair with number of rows
         self.df_input_CSV = df_input_CSV
 
-    def read_gtf(self):
+    def read_gtf(self) -> None:
+        """Read and process the GTF file.
+
+        Reads a GTF file and determines copy numbers from normalized probabilities.
+
+        Returns: None
+
+        """
         # returns GTF with essential columns such as "feature", "seqname", "start", "end"
         # alongside the names of any optional keys which appeared in the attribute column
         df_input_GTF = read_gtf(self.gtf)
@@ -132,7 +185,7 @@ class CDNAGen:
             id_CSV = str(row["seqname"]).split("_")[1]
             # Calculate Normalized_Binding_Probability and add to GTF dataframe
             df_input_GTF.loc[index, "Normalized_Binding_Probability"] = (
-                row["Binding_Probability"] / df_normalization_bind_probablility[id_GTF]
+                    row["Binding_Probability"] / df_normalization_bind_probablility[id_GTF]
             )
             # Calculate Normalized_Binding_Probability and add to GTF dataframe
             csv_transcript_copy_number = self.df_input_CSV.loc[
@@ -148,15 +201,26 @@ class CDNAGen:
 
         self.df_input_GTF = df_input_GTF
 
-    def write_fasta(self):
+    def write_fasta(self) -> None:
+        """Writes cDNA fasta records to file.
+
+        Wrapper for SeqIO.write.
+
+        Returns: None
+
+        """
         SeqIO.write(self.fasta_records, self.output_fasta, "fasta")
         print(f"Fasta file successfully written to: {self.output_fasta}")
 
-    def write_csv(self):
+    def write_csv(self) -> None:
+        """Writes the copy number information to a csv file.
+
+        Wrapper for Pandas to_csv.
+
+        Returns: None
+
+        """
         self.df_input_GTF[["cdna_ID", "Transcript_Copy_Number"]].to_csv(
             self.output_csv, index=False
         )
         print(f"Copy number csv file successfully written to: {self.output_csv}")
-
-
-
diff --git a/requirements.txt b/requirements.txt
index 3abd004..d9abb6b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 gtfparse
-biopython
\ No newline at end of file
+biopython
+pandas
diff --git a/setup.py b/setup.py
index 96ecc09..73e2bfd 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,8 @@
 from setuptools import setup, find_packages
 
+with open('requirements.txt') as f:
+    required = f.read().splitlines()
+
 setup(
     name='cdna',
     url='https://gitlab.com/my_user_name/my_package.git',
@@ -9,5 +12,5 @@ setup(
     license='MIT',
     version='1.0.0',
     packages=find_packages(),  # this will autodetect Python packages from the directory tree, e.g., in `code/`
-    install_requires=[],  # add here packages that are required for your package to run, including version or range of versions
+    install_requires=required,  # add here packages that are required for your package to run, including version or range of versions
 )
-- 
GitLab