From f590a529d6378086cbaa0d97bb8da129facf8c3c Mon Sep 17 00:00:00 2001 From: Laura Urbanska <laura.urbanska@stud.unibas.ch> Date: Tue, 10 Jan 2023 09:05:45 +0000 Subject: [PATCH] Updated find_representative_transcripts.py by including GTF writer --- scripts/find_representative_transcripts.py | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/scripts/find_representative_transcripts.py b/scripts/find_representative_transcripts.py index f1961d6..f378f31 100644 --- a/scripts/find_representative_transcripts.py +++ b/scripts/find_representative_transcripts.py @@ -176,6 +176,29 @@ def get_rep_trans(file_name = "test"): rep_transcripts = _re_format(rep_trans) return(rep_transcripts ) +def gtf_file_writer (original_file, output_file): + """ + this function writes the output GTF file + """ + output = [] + rep_transcript_dict = get_rep_trans(original_file) + + with open(original_file, 'r') as f: + for entry in f: + if entry[0] != '#': + attributes = attributs_converter(entry) + type_ = attributes[2] + if type_ == 'gene': + gene_id = find_in_attributs(attributes, 'gene_id') + output.append(entry) + if type_ != 'gene': + transcript_id = find_in_attributs(attributes, 'transcript_id') + if rep_transcript_dict[gene_id] == transcript_id: + output.append(entry) + + with open(output_file, 'w') as last_file: + last_file.write(output) + def _test(): """ This funtion is ment to be run for test -- GitLab