Skip to content
Snippets Groups Projects
Commit 2c662762 authored by Laura Urbanska's avatar Laura Urbanska
Browse files

updated script names and started updated execution file

parent 83f6b6f3
No related branches found
No related tags found
No related merge requests found
### Imports ###
import os
import transkript_extractor as te
import Exon_length_filter as elf
import representative_v4 as rtcl
### Scipt ###
def exe(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = os.getcwd(),Input_free = True):
file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transkript(file_name,source_pathway_name,deposit_pathway_name,Input_free = Input_free)
inter_mediate_file_directory = os.path.join(deposit_pathway_name,file_name+"_intermediate_file.txt")
print("Transcripts are filterd based on transcipt score please wait...")
pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory)
print("Transcripts filtered\n")
elf.exon_length_filter(file_name,source_pathway_name,deposit_pathway_name,gen_dict= pre_filter_representative_transcripts_dict,Input_free = Input_free)
return(file_name,source_pathway_name,deposit_pathway_name)
### from consol ####
##D:\\Uni\\Sem 9\\Programing in the Life sciences\\Projekt\\Intermediat Files
if __name__ == "__main__":
exe()
\ No newline at end of file
......@@ -5,11 +5,11 @@ Version 1.1.0"""
import re
import os
import transkript_extractor as te
import transcript_extractor as te
### Functions ###
def exon_length_calculator(entry):
"""This funtion finds the start and end cordinates of the exon and uses them to calculate its lenght"""
"""This function finds the start and end cordinates of the exon and uses them to calculate its length"""
try:
find_exon_coordinates = re.compile("\t\d{1,15}\t")
#this difines the pattern of the coordinates
......@@ -25,12 +25,12 @@ def exon_length_calculator(entry):
try_find_end_coordinates = find_exon_coordinates.search(sub_entry)
end_coordinates = int(try_find_end_coordinates[0].replace("\t",""))
#these two lines find the end coordinates and turn tham int an int
exon_lenght = end_coordinates-start_coordinates
exon_length = end_coordinates-start_coordinates
#this line claculates the transcript length
except:
print("\n\nIn the following enty only one or no valid coordinates could be found:\n",entry,"the value will be set to NA")
exon_lenght = "NA"
return(exon_lenght)
exon_length = "NA"
return(exon_length)
def exon_fider(entry):
"""This funtion determines if a given entry belongs to an exon
......@@ -46,7 +46,7 @@ def exon_fider(entry):
return(try_exon_test)
def __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID):
"""This funtion encapsulates an opperation that has to be carried out at several point ind the exon_length_filter funktion and servers to make that funktion more modular"""
"""This funtion encapsulates an operation that has to be carried out at several points in the exon_length_filter function and serves to make that function more modular"""
if current_exon_length > longest_transcript:
#This condition updates the most promesing for
#beeing the representative transcript
......@@ -65,7 +65,7 @@ def _representative_transcript_csv (representative_transcript,file_name = "test"
def _exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name =os.getcwd(),gen_dict = {"ENSG00000160072":["ENST00000673477","ENST00000472194","ENST00000378736","ENST00000308647","ENST00000442483"],"ENSG00000225972":["ENST00000416931"],"ENSG00000279928":["ENST00000624431","ENST00000424215"],"ENSG00000142611":["ENST00000378391","ENST00000607632","ENST00000511072"]}):
"""This funtion selects only the transcripts for a dictionar that have the longest total mRNA"""
"""This funtion selects only the transcripts for a dictionary that have the longest total mRNA"""
bar,start_time = te.bar_builder(length_multiplyer = 3)
total_genes = len(gen_dict)
gens_done = 0
......@@ -133,7 +133,7 @@ def _exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),dep
current_transcript_ID = te.transcript_ID_finder(entry)
except:
continue
#The block above searches for a trnascript ID in the current enty
#The block above searches for a transcript ID in the current entry
if current_transcript_ID in transcript_IDs:
#This condition test if the Transcript is one of the
......@@ -185,4 +185,4 @@ if __name__ == "__main__":
exon_length_filter()
#This line allows the file to be executed on its own also from
\ No newline at end of file
#This line allows the file to be executed on its own also from
%% Cell type:code id: tags:
``` python
import os
import argparse
import transcript_extractor as te
import exon_length_filter as elf
import representative as rtcl
import representative as rp
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="transcript sampler",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--annotation", required=True, help="gtf file with genome annotation")
#parser.add_argument("--expression_level", required=True, help="csv file with expression level")
parser.add_argument("--output_csv", required=True, help="output csv file")
parser.add_argument("--output_gtf", required=True, help="output gtf file")
parser.add_argument("--transcript_number", required=True, help="total number of transcripts to sample")
args = parser.parse_args()
def exe(input_file, csv, gtf, transcript_nr):
file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transcript(input_file, deposit_pathway_name = True, Input_free = Input_free)
inter_mediate_file_directory = input_file +"_intermediate_file.txt"
print("Transcripts are filtered based on transcript score. Please wait...")
pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory)
print("Transcripts filtered\n")
elf.exon_length_filter(file_name,gen_dict= pre_filter_representative_transcripts_dict, Input_free = True)
#return(file_name,source_pathway_name,deposit_pathway_name)
```
%% Output
usage: ipykernel_launcher.py [-h] --annotation ANNOTATION --expression_level
EXPRESSION_LEVEL --output_csv OUTPUT_CSV
--output_gtf OUTPUT_GTF --transcript_number
TRANSCRIPT_NUMBER
ipykernel_launcher.py: error: the following arguments are required: --annotation, --expression_level, --output_csv, --output_gtf, --transcript_number
An exception has occurred, use %tb to see the full traceback.
SystemExit: 2
%% Cell type:code id: tags:
``` python
```
File moved
......@@ -11,7 +11,7 @@ import time
def __parameter_editor(file_name,source_pathway_name,deposit_pathway_name):
"""This function allows for chaging the parameters after running the program"""
"""This function allows for changing the parameters after running the program"""
while True:
print("The program will run with the following parameters:\nFile name:\t\t",file_name,"\nSource pathway:\t",source_pathway_name,"\nDeposit pathway:\t",deposit_pathway_name,"\n")
parameter_conformation = input("To continue with these parameters input [continue or c] to change them input [edit]\n>")
......@@ -273,7 +273,7 @@ def _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name):
print("The transcripts have been collected")
def extract_transkript (file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = False,Input_free = False):
def extract_transcript(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = False,Input_free = False):
"""This it the overall exetutable funtion that will execute the transcript extraction process for a given file with all checks.
Expected input:
file_name: str ; default = test #the name of the gft file you want to look at
......@@ -305,7 +305,7 @@ def extract_transkript (file_name = "test",source_pathway_name = os.getcwd(),dep
#### Dev part ####
if __name__ == "__main__":
extract_transkript()
extract_transcript()
#This line allows the file to be executed on its own also from
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment