From 56f672d11011349f05b00dfd5702bfd6e1ab2a30 Mon Sep 17 00:00:00 2001 From: Larissa Glass <larissa.glass@unibas.ch> Date: Wed, 9 Nov 2022 15:12:53 +0000 Subject: [PATCH] Add original transcript id to output transcripts --- tsg/cli.py | 4 ++++ tsg/main.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tsg/cli.py b/tsg/cli.py index 57b6e0e..1a667d8 100644 --- a/tsg/cli.py +++ b/tsg/cli.py @@ -46,6 +46,10 @@ def output_filename(filename: str) -> str: outfile = "generated_" + filepath.name else: raise NotImplementedError() + + if Path(outfile).exists(): + raise FileExistsError(f"The output file {outfile} already exists.") + return outfile diff --git a/tsg/main.py b/tsg/main.py index 9f506bf..25bba5f 100644 --- a/tsg/main.py +++ b/tsg/main.py @@ -22,6 +22,8 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame: return pd.read_table(transcripts_file, header=None, names=cols) elif transcripts_file.endswith(".csv"): return pd.read_csv(transcripts_file, header=None, names=cols) + else: + raise ValueError("File type needs to be either csv or tsv") def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame: @@ -117,6 +119,9 @@ class Gtf: def read_file(self, annotations_file: str) -> None: # for large annotation files, iterate over lines and filter before saving to dataframe + if not annotations_file.endswith("gtf"): + raise ValueError("File type needs to be gtf") + reader = pd.read_table( annotations_file, sep="\t", @@ -260,7 +265,7 @@ class TranscriptGenerator: ids, inclusions, counts = self._get_unique_inclusions() with open(filename, "a") as fh: for transcript_id, transcript_count in zip(ids, counts): - fh.write(f"{transcript_id},{transcript_count}\n") + fh.write(f"{transcript_id},{self.id},{transcript_count}\n") def generate_annotations(self, filename: str) -> None: ids, inclusions, counts = self._get_unique_inclusions() -- GitLab