diff --git a/scripts/match_reprtranscript_expressionlevel.py b/scripts/match_reprtranscript_expressionlevel.py index 86548502477960850e7eb79f469e5e874f202063..17d8bcffcf4da1eee4b987182194fafd830446f0 100644 --- a/scripts/match_reprtranscript_expressionlevel.py +++ b/scripts/match_reprtranscript_expressionlevel.py @@ -187,14 +187,14 @@ def match_reprTranscript_expressionLevel(exprTrans:str, dict_reprTrans:dict, int # run the programm -dict_txt = a #input a dict of {gene:reprTrans} in the form of a txt file -input_intermediate_file = b #input the intermediate file generated by transckript extractor -input_expr = c #input a csv or tsv file containing the expr level +#dict_txt = a #input a dict of {gene:reprTrans} in the form of a txt file +#input_intermediate_file = b #input the intermediate file generated by transckript extractor +#input_expr = c #input a csv or tsv file containing the expr level -dict_reprTrans = txt_to_dict(dict_txt) -match_final = match_reprTranscript_expressionLevel(input_expr, dict_reprTrans, input_intermediate_file) -print("this is the function :\n\n {}".format(match_final)) +#dict_reprTrans = txt_to_dict(dict_txt) +#match_final = match_reprTranscript_expressionLevel(input_expr, dict_reprTrans, input_intermediate_file) +#print("this is the function :\n\n {}".format(match_final)) -if __name__ == "__main__": - match_reprTranscript_expressionLevel() +#if __name__ == "__main__": +# match_reprTranscript_expressionLevel() diff --git a/scripts/new_exe.py b/scripts/new_exe.py new file mode 100644 index 0000000000000000000000000000000000000000..e2bdb3114446816d5bab770f7042f595635f3b5f --- /dev/null +++ b/scripts/new_exe.py @@ -0,0 +1,41 @@ +import argparse +import transcript_extractor as te +import exon_length_filter as elf +import representative as rtcl +import poisson_sampling as ps +import writegtf as gt +import match_reprtranscript_expressionlevel as ma + + +def exe(input_file, csv, gtf, transcript_nr): + file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transcript(input_file, deposit_pathway_name = True, Input_free = Input_free) + inter_mediate_file_directory = input_file +"_intermediate_file.txt" + + print("Transcripts are filtered based on transcript score. Please wait...") + + pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory) + + print("Transcripts filtered\n") + elf.exon_length_filter(file_name,gen_dict= pre_filter_representative_transcripts_dict, Input_free = True) + + tsv_input = ma.output_tsv() + print("Poisson sampling of transcripts") + ps.transcript_sampling(transcript_nr, tsv_input, csv) + print("output csv file ready") + + print("writing output gtf file") + gt.gtf_file_writer(input_file, csv, gtf) + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="transcript sampler", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("--annotation", required=True, help="gtf file with genome annotation") + parser.add_argument("--output_csv", required=True, help="output csv file") + parser.add_argument("--output_gtf", required=True, help="output gtf file") + parser.add_argument("--transcript_number", required=True, help="total number of transcripts to sample") + args = parser.parse_args() + exe(args.annotation, args.output_csv, args.output_gtf, args.transcript_number) \ No newline at end of file