Skip to content
Snippets Groups Projects
Commit 56f672d1 authored by Larissa Glass's avatar Larissa Glass
Browse files

Add original transcript id to output transcripts

parent 8624746f
No related branches found
No related tags found
1 merge request!12Add original transcript id to output transcripts
......@@ -46,6 +46,10 @@ def output_filename(filename: str) -> str:
outfile = "generated_" + filepath.name
else:
raise NotImplementedError()
if Path(outfile).exists():
raise FileExistsError(f"The output file {outfile} already exists.")
return outfile
......
......@@ -22,6 +22,8 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame:
return pd.read_table(transcripts_file, header=None, names=cols)
elif transcripts_file.endswith(".csv"):
return pd.read_csv(transcripts_file, header=None, names=cols)
else:
raise ValueError("File type needs to be either csv or tsv")
def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame:
......@@ -117,6 +119,9 @@ class Gtf:
def read_file(self, annotations_file: str) -> None:
# for large annotation files, iterate over lines and filter before saving to dataframe
if not annotations_file.endswith("gtf"):
raise ValueError("File type needs to be gtf")
reader = pd.read_table(
annotations_file,
sep="\t",
......@@ -260,7 +265,7 @@ class TranscriptGenerator:
ids, inclusions, counts = self._get_unique_inclusions()
with open(filename, "a") as fh:
for transcript_id, transcript_count in zip(ids, counts):
fh.write(f"{transcript_id},{transcript_count}\n")
fh.write(f"{transcript_id},{self.id},{transcript_count}\n")
def generate_annotations(self, filename: str) -> None:
ids, inclusions, counts = self._get_unique_inclusions()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment