From 56f672d11011349f05b00dfd5702bfd6e1ab2a30 Mon Sep 17 00:00:00 2001
From: Larissa Glass <larissa.glass@unibas.ch>
Date: Wed, 9 Nov 2022 15:12:53 +0000
Subject: [PATCH] Add original transcript id to output transcripts

---
 tsg/cli.py  | 4 ++++
 tsg/main.py | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tsg/cli.py b/tsg/cli.py
index 57b6e0e..1a667d8 100644
--- a/tsg/cli.py
+++ b/tsg/cli.py
@@ -46,6 +46,10 @@ def output_filename(filename: str) -> str:
         outfile = "generated_" + filepath.name
     else:
         raise NotImplementedError()
+
+    if Path(outfile).exists():
+        raise FileExistsError(f"The output file {outfile} already exists.")
+        
     return outfile
 
 
diff --git a/tsg/main.py b/tsg/main.py
index 9f506bf..25bba5f 100644
--- a/tsg/main.py
+++ b/tsg/main.py
@@ -22,6 +22,8 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame:
         return pd.read_table(transcripts_file, header=None, names=cols)
     elif transcripts_file.endswith(".csv"):
         return pd.read_csv(transcripts_file, header=None, names=cols)
+    else:
+        raise ValueError("File type needs to be either csv or tsv")
 
 
 def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame:
@@ -117,6 +119,9 @@ class Gtf:
 
     def read_file(self, annotations_file: str) -> None:
         # for large annotation files, iterate over lines and filter before saving to dataframe
+        if not annotations_file.endswith("gtf"):
+            raise ValueError("File type needs to be gtf")
+
         reader = pd.read_table(
             annotations_file,
             sep="\t",
@@ -260,7 +265,7 @@ class TranscriptGenerator:
         ids, inclusions, counts = self._get_unique_inclusions()
         with open(filename, "a") as fh:
             for transcript_id, transcript_count in zip(ids, counts):
-                fh.write(f"{transcript_id},{transcript_count}\n")
+                fh.write(f"{transcript_id},{self.id},{transcript_count}\n")
 
     def generate_annotations(self, filename: str) -> None:
         ids, inclusions, counts = self._get_unique_inclusions()
-- 
GitLab