Skip to content
Snippets Groups Projects
Commit 56f672d1 authored by Larissa Glass's avatar Larissa Glass
Browse files

Add original transcript id to output transcripts

parent 8624746f
No related branches found
No related tags found
1 merge request!12Add original transcript id to output transcripts
...@@ -46,6 +46,10 @@ def output_filename(filename: str) -> str: ...@@ -46,6 +46,10 @@ def output_filename(filename: str) -> str:
outfile = "generated_" + filepath.name outfile = "generated_" + filepath.name
else: else:
raise NotImplementedError() raise NotImplementedError()
if Path(outfile).exists():
raise FileExistsError(f"The output file {outfile} already exists.")
return outfile return outfile
......
...@@ -22,6 +22,8 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame: ...@@ -22,6 +22,8 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame:
return pd.read_table(transcripts_file, header=None, names=cols) return pd.read_table(transcripts_file, header=None, names=cols)
elif transcripts_file.endswith(".csv"): elif transcripts_file.endswith(".csv"):
return pd.read_csv(transcripts_file, header=None, names=cols) return pd.read_csv(transcripts_file, header=None, names=cols)
else:
raise ValueError("File type needs to be either csv or tsv")
def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame: def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame:
...@@ -117,6 +119,9 @@ class Gtf: ...@@ -117,6 +119,9 @@ class Gtf:
def read_file(self, annotations_file: str) -> None: def read_file(self, annotations_file: str) -> None:
# for large annotation files, iterate over lines and filter before saving to dataframe # for large annotation files, iterate over lines and filter before saving to dataframe
if not annotations_file.endswith("gtf"):
raise ValueError("File type needs to be gtf")
reader = pd.read_table( reader = pd.read_table(
annotations_file, annotations_file,
sep="\t", sep="\t",
...@@ -260,7 +265,7 @@ class TranscriptGenerator: ...@@ -260,7 +265,7 @@ class TranscriptGenerator:
ids, inclusions, counts = self._get_unique_inclusions() ids, inclusions, counts = self._get_unique_inclusions()
with open(filename, "a") as fh: with open(filename, "a") as fh:
for transcript_id, transcript_count in zip(ids, counts): for transcript_id, transcript_count in zip(ids, counts):
fh.write(f"{transcript_id},{transcript_count}\n") fh.write(f"{transcript_id},{self.id},{transcript_count}\n")
def generate_annotations(self, filename: str) -> None: def generate_annotations(self, filename: str) -> None:
ids, inclusions, counts = self._get_unique_inclusions() ids, inclusions, counts = self._get_unique_inclusions()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment