Skip to content
Snippets Groups Projects
Commit d9b2b067 authored by Jakob Rien's avatar Jakob Rien
Browse files

Replace find_representative_transcripts.py

parent 8546d6df
No related branches found
No related tags found
No related merge requests found
#### Find representative transcripts #### #### Find representative transcripts ####
"""Version 0.0.1""" """Version 1.0.0"""
### Imports ### ### Imports ###
import argparse
### Functions ### ### Functions ###
...@@ -35,7 +36,7 @@ def find_in_attributs (attributs,look_for): ...@@ -35,7 +36,7 @@ def find_in_attributs (attributs,look_for):
index = attributs.index(look_for)+1 index = attributs.index(look_for)+1
return attributs[index] return attributs[index]
except: except:
print("ERROR in findinge",look_for,"in the entry the return was set to NA\n",attributs) #print("No",look_for,"in the entry the return was set to NA\n",attributs)
return "NA" return "NA"
def exon_length(entry): def exon_length(entry):
...@@ -94,7 +95,8 @@ def get_rep_trans(file_name = "test"): ...@@ -94,7 +95,8 @@ def get_rep_trans(file_name = "test"):
entry = line.split("\t") entry = line.split("\t")
#removes expected but unneeded entrys #removes expected but unneeded entrys
if len(entry) == 1 or entry[2] in ["CDS","stop_codon","five_prime_utr","three_prime_utr","start_codon"]: exp_unneed = ["CDS","stop_codon","five_prime_utr","three_prime_utr","start_codon",'Selenocysteine']
if len(entry) == 1 or entry[2] in exp_unneed:
continue continue
#this funtion truns the less organized part of the entry into a uable list #this funtion truns the less organized part of the entry into a uable list
...@@ -179,13 +181,36 @@ def get_rep_trans(file_name = "test"): ...@@ -179,13 +181,36 @@ def get_rep_trans(file_name = "test"):
rep_transcripts = _re_format(rep_trans) rep_transcripts = _re_format(rep_trans)
return(rep_transcripts ) return(rep_transcripts )
### Execution part ### def _test():
if __name__ == "__main__": """
This funtion is ment to be run for test
Output:
file with the dictionary generated based on the test file
"""
file_name = "test" file_name = "test"
rt = get_rep_trans(file_name) rt = get_rep_trans(file_name)
with open ("representative_transcripts_"+file_name+".txt","w") as rtf: expected_result = {"ENSG00000160072":"ENST00000472194","ENSG00000234396":"ENST00000442483",
for key in rt: "ENSG00000225972":"ENST00000416931","ENSG00000224315":"ENST00000428803",
rtf.write(key+"\t"+rt[key]+"\n") "ENSG00000198744":"ENST00000416718","ENSG00000279928":"ENST00000624431",
"ENSG00000228037":"ENST00000424215"}
if rt != expected_result:
print("The test fail due to not yieding the same results")
print("The results the program got\n",rt)
print("The expected results\n",expected_result)
else:
print("The test was succses full")
### Execution part ###
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="find_representativ_transcripts",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-file_name", required=True, help="gtf file with genome annotation")
parser.add_argument("-t", required=False,default = False,help="to run the test input -t True")
args = parser.parse_args()
if args.t:
_test()
else:
get_rep_trans(args.file_name)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment