From 0db28afa685a4047050960946baad1aa251dbffc Mon Sep 17 00:00:00 2001 From: Eric Boittier <ericdavid.boittier@unibas.ch> Date: Mon, 12 Dec 2022 15:37:25 +0100 Subject: [PATCH] setup.py uses requirements.txt --- Dockerfile | 10 +++++ cdna/cdna.py | 112 +++++++++++++++++++++++++++++++++++++---------- requirements.txt | 3 +- setup.py | 5 ++- 4 files changed, 104 insertions(+), 26 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e71e517 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8-slim-buster + +MAINTAINER EricBoittier + +ENV PATH=$PATH:/usr/games/ + +COPY . . + + + diff --git a/cdna/cdna.py b/cdna/cdna.py index 4a1b912..c5f6d31 100644 --- a/cdna/cdna.py +++ b/cdna/cdna.py @@ -13,10 +13,12 @@ warnings.filterwarnings(action="ignore", category=FutureWarning) def compliment(res: str) -> str: """ - Returns the compliment of a given DNA residue. - - :param res: DNA residue - :return: + Returns the cDNA compliment of a given base pair + Args: + res: residue code. + + Returns: corresponding cDNA residue. + """ translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"} if res not in translate_dict.keys(): @@ -27,11 +29,14 @@ def compliment(res: str) -> str: def seq_compliment(sequence: str) -> str: """ - Returns the corresponding cDNA sequence for a given input by finding the - corresponding compliment base pair and reversing the input. + Returns the corresponding cDNA sequence by finding the complimentary + base pairs and returning the reversed sequence. + + Args: + sequence: sequence to be converted into cDNA. + + Returns: corresponding cDNA sequence. - :param sequence: DNA sequence - :return: cDNA sequence """ if sequence is None: return "None" @@ -51,21 +56,26 @@ class CDNAGen: # variables self.fasta_dict = None self.fasta_records = None - + self.df_input_GTF = None self.run() - def run(self): + def run(self) -> None: + """ + Executes the cDNA workflow. + Returns: None + + """ self.read_csv() self.read_fasta() self.read_gtf() self.add_sequences() self.add_compliment() self.add_records() - print() + print() # blank line for pretty printing self.write_fasta() self.write_csv() - def add_records(self): + def add_records(self) -> None: self.fasta_records = [] for index, row in self.df_input_GTF.iterrows(): if row["compliment"] is not None: @@ -78,36 +88,79 @@ class CDNAGen: ) self.fasta_records.append(record) - def add_sequences(self): + def add_sequences(self) -> None: + """ + Adds the sequence for a given priming site. + Returns: None + + """ self.df_input_GTF["priming_site"] = self.df_input_GTF.apply( lambda row: self.read_primingsite(row["seqname"], row["start"]), axis=1, ) - def add_compliment(self): + def add_compliment(self) -> None: + """ + Adds the complimentary cDNA sequence. + Returns: None + + """ self.df_input_GTF["compliment"] = self.df_input_GTF["priming_site"].apply( lambda x: seq_compliment(x) ) - def read_primingsite(self, sequence, start): + def read_primingsite(self, sequence: str, start: int) -> None: + """Read a fasta file from a given start character + + Reads a fasta sequence with ID (sequence) and returns the + sequence starting from the index start. + + Args: + sequence: sequence ID to be read. + start: start of the sequence. + + Returns: None + + """ if sequence not in self.fasta_dict.keys(): return None _ = self.fasta_dict[sequence].seq[start:] return _ - def read_fasta(self): + def read_fasta(self) -> None: + """Read a given fasta file. + + Wrapper for SeqIO.parse. + + Returns: None + + """ record = SeqIO.parse(self.fasta, "fasta") records = list(record) self.fasta_dict = {x.name: x for x in records} - def read_csv(self): + def read_csv(self) -> None: + """ Reads a given copy number csv file + + Wrapper for Pandas read_csv. + + Returns: None + + """ df_input_CSV = pd.read_csv(self.cpn, index_col=False) df_input_CSV = ( df_input_CSV.reset_index() ) # make sure indexes pair with number of rows self.df_input_CSV = df_input_CSV - def read_gtf(self): + def read_gtf(self) -> None: + """Read and process the GTF file. + + Reads a GTF file and determines copy numbers from normalized probabilities. + + Returns: None + + """ # returns GTF with essential columns such as "feature", "seqname", "start", "end" # alongside the names of any optional keys which appeared in the attribute column df_input_GTF = read_gtf(self.gtf) @@ -132,7 +185,7 @@ class CDNAGen: id_CSV = str(row["seqname"]).split("_")[1] # Calculate Normalized_Binding_Probability and add to GTF dataframe df_input_GTF.loc[index, "Normalized_Binding_Probability"] = ( - row["Binding_Probability"] / df_normalization_bind_probablility[id_GTF] + row["Binding_Probability"] / df_normalization_bind_probablility[id_GTF] ) # Calculate Normalized_Binding_Probability and add to GTF dataframe csv_transcript_copy_number = self.df_input_CSV.loc[ @@ -148,15 +201,26 @@ class CDNAGen: self.df_input_GTF = df_input_GTF - def write_fasta(self): + def write_fasta(self) -> None: + """Writes cDNA fasta records to file. + + Wrapper for SeqIO.write. + + Returns: None + + """ SeqIO.write(self.fasta_records, self.output_fasta, "fasta") print(f"Fasta file successfully written to: {self.output_fasta}") - def write_csv(self): + def write_csv(self) -> None: + """Writes the copy number information to a csv file. + + Wrapper for Pandas to_csv. + + Returns: None + + """ self.df_input_GTF[["cdna_ID", "Transcript_Copy_Number"]].to_csv( self.output_csv, index=False ) print(f"Copy number csv file successfully written to: {self.output_csv}") - - - diff --git a/requirements.txt b/requirements.txt index 3abd004..d9abb6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ gtfparse -biopython \ No newline at end of file +biopython +pandas diff --git a/setup.py b/setup.py index 96ecc09..73e2bfd 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,8 @@ from setuptools import setup, find_packages +with open('requirements.txt') as f: + required = f.read().splitlines() + setup( name='cdna', url='https://gitlab.com/my_user_name/my_package.git', @@ -9,5 +12,5 @@ setup( license='MIT', version='1.0.0', packages=find_packages(), # this will autodetect Python packages from the directory tree, e.g., in `code/` - install_requires=[], # add here packages that are required for your package to run, including version or range of versions + install_requires=required, # add here packages that are required for your package to run, including version or range of versions ) -- GitLab