diff --git a/README.md b/README.md index 64b07108ff8a801af67d502a305b75d65dc41e8f..e962aad785599c6265747112063e30b85d5c4e7d 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ Description of the module: The function of this module is to generate cdDNA based on mRNA transcript seuqences and the coresponding priming probabilities. +**Example usage** + + python ../cdna/cli.py -ifa yeast_example.fa -icpn copy_number_input.csv -igt Example_GTF_Input.GTF -ofa cDNA.fasta -ocsv cDNA.csv + **Input files** diff --git a/cdna/cdna.py b/cdna/cdna.py index a6025f8f86b5a9634b809e8fa7b6a1f0dd63a041..4a1b91267be3a7982b03c8eaf6e11dcf5ce249d6 100644 --- a/cdna/cdna.py +++ b/cdna/cdna.py @@ -1,8 +1,6 @@ import sys import warnings -import logging -from cli import parser import pandas as pd from Bio import SeqIO from Bio.Seq import Seq @@ -14,6 +12,12 @@ warnings.filterwarnings(action="ignore", category=FutureWarning) def compliment(res: str) -> str: + """ + Returns the compliment of a given DNA residue. + + :param res: DNA residue + :return: + """ translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"} if res not in translate_dict.keys(): print(f"Unknown character, {res}") @@ -22,6 +26,13 @@ def compliment(res: str) -> str: def seq_compliment(sequence: str) -> str: + """ + Returns the corresponding cDNA sequence for a given input by finding the + corresponding compliment base pair and reversing the input. + + :param sequence: DNA sequence + :return: cDNA sequence + """ if sequence is None: return "None" _ = "".join([compliment(char) for char in str(sequence)])[::-1] # reverse string @@ -50,6 +61,7 @@ class CDNAGen: self.add_sequences() self.add_compliment() self.add_records() + print() self.write_fasta() self.write_csv() @@ -137,22 +149,14 @@ class CDNAGen: self.df_input_GTF = df_input_GTF def write_fasta(self): - print(self.fasta_records) SeqIO.write(self.fasta_records, self.output_fasta, "fasta") + print(f"Fasta file successfully written to: {self.output_fasta}") def write_csv(self): self.df_input_GTF[["cdna_ID", "Transcript_Copy_Number"]].to_csv( self.output_csv, index=False ) + print(f"Copy number csv file successfully written to: {self.output_csv}") - def return_output(self): - return self.output_fasta, self.output_csv -if __name__ == "main": - logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', - level=logging.INFO, - ) - LOG = logging.getLogger(__name__) - cnda_object = parser() diff --git a/cdna/cli.py b/cdna/cli.py index 8c533020acc431a65bdf5766a42eb4a94a1c4351..f3100e0d1afa3a238d834f590a3811e0a1311797 100644 --- a/cdna/cli.py +++ b/cdna/cli.py @@ -1,5 +1,7 @@ -import cdna import argparse +import logging + +from cdna import CDNAGen def parser(): @@ -7,17 +9,32 @@ def parser(): prog="cDNA generator", description="Generate cDNA sequences based on primer probabilities.", ) - parser.add_argument("--input_fasta_file", help="genome fasta file") - parser.add_argument("--input_gtf", help="gtf file") - parser.add_argument("--output_fasta_name", help="output fasta file") - parser.add_argument("--input_copy_number", help="input copy number (csv) file") - parser.add_argument("--output_csv_name", help="output fasta file") + parser.add_argument("-ifa", "--input_fasta", help="genome fasta file", required=True) + parser.add_argument("-igtf", "--input_gtf", help="gtf file", required=True) + parser.add_argument("-ofa", "--output_fasta", help="output fasta file", required=True) + parser.add_argument("-icpn", "--input_copy_number", help="input copy number (csv) file", required=True) + parser.add_argument("-ocsv", "--output_csv", help="output fasta file", required=True) args = parser.parse_args() - CDNA = cdna.cdna.CDNAGen( - ifasta=args["input_fasta_file"], - igtf=args["input_gtf_file"], - icpn=args["input_copy_number"], - ocsv=args["output_csv_name"], - ofasta=args["output_fasta_name"], + + print(' \n'.join(f'{k}={v}' for k, v in vars(args).items())) + print() + CDNA = CDNAGen( + ifasta=args.input_fasta, + igtf=args.input_gtf, + icpn=args.input_copy_number, + ocsv=args.output_csv, + ofasta=args.output_fasta, ) return CDNA + + +if __name__ == "__main__": + logging.basicConfig( + format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', + level=logging.INFO, + ) + LOG = logging.getLogger(__name__) + print("**********************") + print("Running cDNA generator") + print("**********************") + cnda_object = parser() diff --git a/test_files/cDNA.fasta b/test_files/cDNA.fasta index 2168d9f9c386c2bf4ef6db01ea350623b1a055ef..86b768197fd6223ebbc36c43a816e744be488339 100644 --- a/test_files/cDNA.fasta +++ b/test_files/cDNA.fasta @@ -121,3 +121,9 @@ CTTCTTCTTTCAACTTTCCGCTGTTTCTTCCATTTATCTCCACGGGTGTGTTGGAAGAAC ATTCCTCATGCACCGAATGTGCGTTTACAGGAATGTCTTGTCCAACGTTATCTGGAGTTG TTTCTGTCTTGGCAGTAATGCTTAATTTCCTGATTTCATTGTTGTTCATCGGATCAACAT ACATAAAAAAAAAAAAAAAAAA +>Transcript_3_0 +None +>Transcript_4_0 +None +>Transcript_5_0 +None