From c30df085d5aa27ad45b19bf6f1b68bc44db22a2c Mon Sep 17 00:00:00 2001 From: Mate Balajti <mate.balajti@unibas.ch> Date: Sun, 13 Aug 2023 21:27:37 +0000 Subject: [PATCH] feat: add logging and update setup.py --- cdna/cdna.py | 18 +++++++--- cdna/cli.py | 30 ++++++++--------- exampleInput.sh | 4 --- setup.py | 33 +++++++++++++------ .../transcript-checkpoint.fasta | 8 ----- {test_files => tests/test_files}/.gitkeep | 0 .../test_files}/Example_GTF_Input.GTF | 0 {test_files => tests/test_files}/cDNA.csv | 0 {test_files => tests/test_files}/cDNA.fasta | 0 .../test_files}/copy_number_file.csv | 0 .../test_files}/copy_number_input.csv | 0 .../test_files}/transcript.fasta | 0 .../test_files}/yeast_example.fa | 0 13 files changed, 49 insertions(+), 44 deletions(-) delete mode 100644 exampleInput.sh delete mode 100644 test_files/.ipynb_checkpoints/transcript-checkpoint.fasta rename {test_files => tests/test_files}/.gitkeep (100%) rename {test_files => tests/test_files}/Example_GTF_Input.GTF (100%) rename {test_files => tests/test_files}/cDNA.csv (100%) rename {test_files => tests/test_files}/cDNA.fasta (100%) rename {test_files => tests/test_files}/copy_number_file.csv (100%) rename {test_files => tests/test_files}/copy_number_input.csv (100%) rename {test_files => tests/test_files}/transcript.fasta (100%) rename {test_files => tests/test_files}/yeast_example.fa (100%) diff --git a/cdna/cdna.py b/cdna/cdna.py index 7593fb0..db41389 100644 --- a/cdna/cdna.py +++ b/cdna/cdna.py @@ -1,5 +1,6 @@ """cDNA generator.""" import warnings +import logging from typing import Optional, List, Dict, Any import pandas as pd # type: ignore from Bio import SeqIO # type: ignore @@ -7,6 +8,8 @@ from Bio.Seq import Seq # type: ignore from Bio.SeqRecord import SeqRecord # type: ignore from gtfparse import read_gtf # type: ignore +LOG = logging.getLogger(__name__) + # ignore warnings from read_gtf warnings.filterwarnings(action="ignore", category=FutureWarning) @@ -23,7 +26,7 @@ def complement(res: str) -> str: """ translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"} if res not in translate_dict: - print(f"Unknown character, {res}") + LOG.warning("Unknown character, %s", res) raise ValueError return translate_dict[res] @@ -40,7 +43,9 @@ def seq_complement(sequence: str) -> Optional[str]: """ if sequence is None: return None - _ = "".join([complement(char) for char in str(sequence)])[::-1] # reverse string # noqa: E501 + _ = "".join([ + complement(char) for char in str(sequence) + ])[::-1] # reverse string return _ @@ -179,6 +184,9 @@ class CDNAGen: # alongside the names of any optional keys \ # which appeared in the attribute column gtf_df = read_gtf(self.gtf) + + gtf_df = gtf_df.to_pandas() # convert polars df to pandas df + gtf_df["Binding_Probability"] = pd.to_numeric( gtf_df["Binding_Probability"] ) # convert to numeric @@ -225,7 +233,7 @@ class CDNAGen: """ SeqIO.write(self.fasta_records, self.output_fasta, "fasta") - print(f"Fasta file successfully written to: {self.output_fasta}") + LOG.info("Fasta file successfully written to: %s", self.output_fasta) def write_csv(self) -> None: """Write the copy number information to a csv file. @@ -237,5 +245,5 @@ class CDNAGen: """ df_to_save = self.gtf_df[["cdna_ID", "Transcript_Copy_Number"]] df_to_save.to_csv(self.output_csv, index=False) - print(f"Copy number csv file successfully written to: \ - {self.output_csv}") + LOG.info("Copy number csv file successfully written to: %s", + self.output_csv) diff --git a/cdna/cli.py b/cdna/cli.py index ce568d8..5416b78 100644 --- a/cdna/cli.py +++ b/cdna/cli.py @@ -3,10 +3,17 @@ import argparse import logging -from cdna.cdna import CDNAGen +logging.basicConfig( + format='[%(asctime)s: %(levelname)s] %(message)s \ + (module "%(module)s")', + level=logging.INFO, +) +LOG = logging.getLogger(__name__) +from cdna.cdna import CDNAGen # noqa: E402,E501 # pylint:disable=wrong-import-position -def cdna_parser() -> CDNAGen: + +def main(): """Parse sequences for cDNA generator. Parses command line arguments for cDNA generation. @@ -35,27 +42,16 @@ def cdna_parser() -> CDNAGen: "-ocsv", "--output_csv", help="output fasta file", required=True ) args = parser.parse_args() - # Print parser arguments - print(" \n".join(f"{k}={v}" for k, v in vars(args).items())) - print() - cdna_inst = CDNAGen( + + LOG.info("Running cDNA generator...") + CDNAGen( ifasta=args.input_fasta, igtf=args.input_gtf, icpn=args.input_copy_number, ocsv=args.output_csv, ofasta=args.output_fasta, ) - return cdna_inst if __name__ == "__main__": - logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s \ - (module "%(module)s")', - level=logging.INFO, - ) - LOG = logging.getLogger(__name__) - print("**********************") - print("Running cDNA generator") - print("**********************") - cdna_parser() + main() diff --git a/exampleInput.sh b/exampleInput.sh deleted file mode 100644 index 547236c..0000000 --- a/exampleInput.sh +++ /dev/null @@ -1,4 +0,0 @@ -python cdna/cli.py -ifa test_files/yeast_example.fa \ - -icpn test_files/copy_number_input.csv \ - -igt test_files/Example_GTF_Input.GTF \ - -ofa test_files/cDNA.fasta -ocsv test_files/cDNA.csv \ No newline at end of file diff --git a/setup.py b/setup.py index 73e2bfd..d57b413 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,29 @@ +"""Set up project.""" +from pathlib import Path from setuptools import setup, find_packages -with open('requirements.txt') as f: - required = f.read().splitlines() +project_root_dir = Path(__file__).parent.resolve() + +with open(project_root_dir / "requirements.txt", + "r", encoding="utf-8") as f: + INSTALL_REQUIRED = f.read().splitlines() + +URL = ('https://git.scicore.unibas.ch/zavolan_group/' + 'tools/cdna-generator') setup( - name='cdna', - url='https://gitlab.com/my_user_name/my_package.git', - author='My Name', - author_email='me@email.org', - description='Brief package description', + name='cdna-generator', + version='0.1.1', + url=URL, license='MIT', - version='1.0.0', - packages=find_packages(), # this will autodetect Python packages from the directory tree, e.g., in `code/` - install_requires=required, # add here packages that are required for your package to run, including version or range of versions + author='Eric Boittier, Bastian Wagner, Quentin Badolle', + author_email='me@email.org', + description='cDNA generator', + packages=find_packages(), + install_required=INSTALL_REQUIRED, + entry_points={ + 'console_scripts': [ + 'cdna-generator=cdna.cli:main' + ] + } ) diff --git a/test_files/.ipynb_checkpoints/transcript-checkpoint.fasta b/test_files/.ipynb_checkpoints/transcript-checkpoint.fasta deleted file mode 100644 index bb37ee2..0000000 --- a/test_files/.ipynb_checkpoints/transcript-checkpoint.fasta +++ /dev/null @@ -1,8 +0,0 @@ ->1 -GAUAGCUAGAGGAUUCUCAGAGGAGAAGCUAGAGGAGCUAGAGGAGCUAGAGGAGCUAGAGGAGCUAGAGG ->2 -AGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAGCUAGAGGAGCUAGAGGAGCUAGAGG ->3 -AGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAGCUAGAGG ->4 -AGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAUAGCUAGAGGAGCUAGAGGAGCUAGAGG diff --git a/test_files/.gitkeep b/tests/test_files/.gitkeep similarity index 100% rename from test_files/.gitkeep rename to tests/test_files/.gitkeep diff --git a/test_files/Example_GTF_Input.GTF b/tests/test_files/Example_GTF_Input.GTF similarity index 100% rename from test_files/Example_GTF_Input.GTF rename to tests/test_files/Example_GTF_Input.GTF diff --git a/test_files/cDNA.csv b/tests/test_files/cDNA.csv similarity index 100% rename from test_files/cDNA.csv rename to tests/test_files/cDNA.csv diff --git a/test_files/cDNA.fasta b/tests/test_files/cDNA.fasta similarity index 100% rename from test_files/cDNA.fasta rename to tests/test_files/cDNA.fasta diff --git a/test_files/copy_number_file.csv b/tests/test_files/copy_number_file.csv similarity index 100% rename from test_files/copy_number_file.csv rename to tests/test_files/copy_number_file.csv diff --git a/test_files/copy_number_input.csv b/tests/test_files/copy_number_input.csv similarity index 100% rename from test_files/copy_number_input.csv rename to tests/test_files/copy_number_input.csv diff --git a/test_files/transcript.fasta b/tests/test_files/transcript.fasta similarity index 100% rename from test_files/transcript.fasta rename to tests/test_files/transcript.fasta diff --git a/test_files/yeast_example.fa b/tests/test_files/yeast_example.fa similarity index 100% rename from test_files/yeast_example.fa rename to tests/test_files/yeast_example.fa -- GitLab