From c30df085d5aa27ad45b19bf6f1b68bc44db22a2c Mon Sep 17 00:00:00 2001
From: Mate Balajti <mate.balajti@unibas.ch>
Date: Sun, 13 Aug 2023 21:27:37 +0000
Subject: [PATCH] feat: add logging and update setup.py

---
 cdna/cdna.py                                  | 18 +++++++---
 cdna/cli.py                                   | 30 ++++++++---------
 exampleInput.sh                               |  4 ---
 setup.py                                      | 33 +++++++++++++------
 .../transcript-checkpoint.fasta               |  8 -----
 {test_files => tests/test_files}/.gitkeep     |  0
 .../test_files}/Example_GTF_Input.GTF         |  0
 {test_files => tests/test_files}/cDNA.csv     |  0
 {test_files => tests/test_files}/cDNA.fasta   |  0
 .../test_files}/copy_number_file.csv          |  0
 .../test_files}/copy_number_input.csv         |  0
 .../test_files}/transcript.fasta              |  0
 .../test_files}/yeast_example.fa              |  0
 13 files changed, 49 insertions(+), 44 deletions(-)
 delete mode 100644 exampleInput.sh
 delete mode 100644 test_files/.ipynb_checkpoints/transcript-checkpoint.fasta
 rename {test_files => tests/test_files}/.gitkeep (100%)
 rename {test_files => tests/test_files}/Example_GTF_Input.GTF (100%)
 rename {test_files => tests/test_files}/cDNA.csv (100%)
 rename {test_files => tests/test_files}/cDNA.fasta (100%)
 rename {test_files => tests/test_files}/copy_number_file.csv (100%)
 rename {test_files => tests/test_files}/copy_number_input.csv (100%)
 rename {test_files => tests/test_files}/transcript.fasta (100%)
 rename {test_files => tests/test_files}/yeast_example.fa (100%)

diff --git a/cdna/cdna.py b/cdna/cdna.py
index 7593fb0..db41389 100644
--- a/cdna/cdna.py
+++ b/cdna/cdna.py
@@ -1,5 +1,6 @@
 """cDNA generator."""
 import warnings
+import logging
 from typing import Optional, List, Dict, Any
 import pandas as pd  # type: ignore
 from Bio import SeqIO  # type: ignore
@@ -7,6 +8,8 @@ from Bio.Seq import Seq  # type: ignore
 from Bio.SeqRecord import SeqRecord  # type: ignore
 from gtfparse import read_gtf  # type: ignore
 
+LOG = logging.getLogger(__name__)
+
 # ignore warnings from read_gtf
 warnings.filterwarnings(action="ignore", category=FutureWarning)
 
@@ -23,7 +26,7 @@ def complement(res: str) -> str:
     """
     translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"}
     if res not in translate_dict:
-        print(f"Unknown character, {res}")
+        LOG.warning("Unknown character, %s", res)
         raise ValueError
     return translate_dict[res]
 
@@ -40,7 +43,9 @@ def seq_complement(sequence: str) -> Optional[str]:
     """
     if sequence is None:
         return None
-    _ = "".join([complement(char) for char in str(sequence)])[::-1]  # reverse string # noqa: E501
+    _ = "".join([
+        complement(char) for char in str(sequence)
+        ])[::-1]  # reverse string
     return _
 
 
@@ -179,6 +184,9 @@ class CDNAGen:
         # alongside the names of any optional keys \
         # which appeared in the attribute column
         gtf_df = read_gtf(self.gtf)
+
+        gtf_df = gtf_df.to_pandas()  # convert polars df to pandas df
+
         gtf_df["Binding_Probability"] = pd.to_numeric(
             gtf_df["Binding_Probability"]
         )  # convert to numeric
@@ -225,7 +233,7 @@ class CDNAGen:
 
         """
         SeqIO.write(self.fasta_records, self.output_fasta, "fasta")
-        print(f"Fasta file successfully written to: {self.output_fasta}")
+        LOG.info("Fasta file successfully written to: %s", self.output_fasta)
 
     def write_csv(self) -> None:
         """Write the copy number information to a csv file.
@@ -237,5 +245,5 @@ class CDNAGen:
         """
         df_to_save = self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]]
         df_to_save.to_csv(self.output_csv, index=False)
-        print(f"Copy number csv file successfully written to: \
-              {self.output_csv}")
+        LOG.info("Copy number csv file successfully written to: %s",
+                 self.output_csv)
diff --git a/cdna/cli.py b/cdna/cli.py
index ce568d8..5416b78 100644
--- a/cdna/cli.py
+++ b/cdna/cli.py
@@ -3,10 +3,17 @@
 import argparse
 import logging
 
-from cdna.cdna import CDNAGen
+logging.basicConfig(
+    format='[%(asctime)s: %(levelname)s] %(message)s \
+        (module "%(module)s")',
+    level=logging.INFO,
+)
+LOG = logging.getLogger(__name__)
 
+from cdna.cdna import CDNAGen  # noqa: E402,E501 # pylint:disable=wrong-import-position
 
-def cdna_parser() -> CDNAGen:
+
+def main():
     """Parse sequences for cDNA generator.
 
     Parses command line arguments for cDNA generation.
@@ -35,27 +42,16 @@ def cdna_parser() -> CDNAGen:
         "-ocsv", "--output_csv", help="output fasta file", required=True
     )
     args = parser.parse_args()
-    #  Print parser arguments
-    print(" \n".join(f"{k}={v}" for k, v in vars(args).items()))
-    print()
-    cdna_inst = CDNAGen(
+
+    LOG.info("Running cDNA generator...")
+    CDNAGen(
         ifasta=args.input_fasta,
         igtf=args.input_gtf,
         icpn=args.input_copy_number,
         ocsv=args.output_csv,
         ofasta=args.output_fasta,
     )
-    return cdna_inst
 
 
 if __name__ == "__main__":
-    logging.basicConfig(
-        format='[%(asctime)s: %(levelname)s] %(message)s \
-            (module "%(module)s")',
-        level=logging.INFO,
-    )
-    LOG = logging.getLogger(__name__)
-    print("**********************")
-    print("Running cDNA generator")
-    print("**********************")
-    cdna_parser()
+    main()
diff --git a/exampleInput.sh b/exampleInput.sh
deleted file mode 100644
index 547236c..0000000
--- a/exampleInput.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-python cdna/cli.py -ifa test_files/yeast_example.fa \
- -icpn test_files/copy_number_input.csv \
- -igt test_files/Example_GTF_Input.GTF \
- -ofa test_files/cDNA.fasta -ocsv test_files/cDNA.csv
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 73e2bfd..d57b413 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,29 @@
+"""Set up project."""
+from pathlib import Path
 from setuptools import setup, find_packages
 
-with open('requirements.txt') as f:
-    required = f.read().splitlines()
+project_root_dir = Path(__file__).parent.resolve()
+
+with open(project_root_dir / "requirements.txt",
+          "r", encoding="utf-8") as f:
+    INSTALL_REQUIRED = f.read().splitlines()
+
+URL = ('https://git.scicore.unibas.ch/zavolan_group/'
+       'tools/cdna-generator')
 
 setup(
-    name='cdna',
-    url='https://gitlab.com/my_user_name/my_package.git',
-    author='My Name',
-    author_email='me@email.org',
-    description='Brief package description',
+    name='cdna-generator',
+    version='0.1.1',
+    url=URL,
     license='MIT',
-    version='1.0.0',
-    packages=find_packages(),  # this will autodetect Python packages from the directory tree, e.g., in `code/`
-    install_requires=required,  # add here packages that are required for your package to run, including version or range of versions
+    author='Eric Boittier, Bastian Wagner, Quentin Badolle',
+    author_email='me@email.org',
+    description='cDNA generator',
+    packages=find_packages(),
+    install_required=INSTALL_REQUIRED,
+    entry_points={
+        'console_scripts': [
+            'cdna-generator=cdna.cli:main'
+            ]
+        }
 )
diff --git a/test_files/.ipynb_checkpoints/transcript-checkpoint.fasta b/test_files/.ipynb_checkpoints/transcript-checkpoint.fasta
deleted file mode 100644
index bb37ee2..0000000
--- a/test_files/.ipynb_checkpoints/transcript-checkpoint.fasta
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-GAUAGCUAGAGGAUUCUCAGAGGAGAAGCUAGAGGAGCUAGAGGAGCUAGAGGAGCUAGAGGAGCUAGAGG
->2
-AGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAGCUAGAGGAGCUAGAGGAGCUAGAGG
->3
-AGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAGCUAGAGG
->4
-AGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAGCUAGAGGAGCUAGAGG
diff --git a/test_files/.gitkeep b/tests/test_files/.gitkeep
similarity index 100%
rename from test_files/.gitkeep
rename to tests/test_files/.gitkeep
diff --git a/test_files/Example_GTF_Input.GTF b/tests/test_files/Example_GTF_Input.GTF
similarity index 100%
rename from test_files/Example_GTF_Input.GTF
rename to tests/test_files/Example_GTF_Input.GTF
diff --git a/test_files/cDNA.csv b/tests/test_files/cDNA.csv
similarity index 100%
rename from test_files/cDNA.csv
rename to tests/test_files/cDNA.csv
diff --git a/test_files/cDNA.fasta b/tests/test_files/cDNA.fasta
similarity index 100%
rename from test_files/cDNA.fasta
rename to tests/test_files/cDNA.fasta
diff --git a/test_files/copy_number_file.csv b/tests/test_files/copy_number_file.csv
similarity index 100%
rename from test_files/copy_number_file.csv
rename to tests/test_files/copy_number_file.csv
diff --git a/test_files/copy_number_input.csv b/tests/test_files/copy_number_input.csv
similarity index 100%
rename from test_files/copy_number_input.csv
rename to tests/test_files/copy_number_input.csv
diff --git a/test_files/transcript.fasta b/tests/test_files/transcript.fasta
similarity index 100%
rename from test_files/transcript.fasta
rename to tests/test_files/transcript.fasta
diff --git a/test_files/yeast_example.fa b/tests/test_files/yeast_example.fa
similarity index 100%
rename from test_files/yeast_example.fa
rename to tests/test_files/yeast_example.fa
-- 
GitLab