-
Laura Urbanska authoredLaura Urbanska authored
new_exe.py 1.71 KiB
import argparse
import transcript_extractor as te
import exon_length_filter as elf
import representative as rtcl
import poisson_sampling as ps
import writegtf as gt
import match_reprtranscript_expressionlevel as ma
def exe(input_file, csv, gtf, transcript_nr,input_free = True):
file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transcript(input_file, Input_free = input_free)
inter_mediate_file_directory = input_file +"_intermediate_file.txt"
print("Transcripts are filtered based on transcript score. Please wait...")
pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory)
print("Transcripts filtered\n")
elf.exon_length_filter(file_name,gen_dict= pre_filter_representative_transcripts_dict, Input_free = input_free)
tsv_input = ma.output_tsv()
print("Poisson sampling of transcripts")
ps.transcript_sampling(transcript_nr, tsv_input, csv)
print("output csv file ready")
print("writing output gtf file")
gt.gtf_file_writer(input_file, csv, gtf)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="transcript sampler",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--annotation", required=True, help="gtf file with genome annotation")
parser.add_argument("--output_csv", required=True, help="output csv file")
parser.add_argument("--output_gtf", required=True, help="output gtf file")
parser.add_argument("--transcript_number", required=True, help="total number of transcripts to sample")
args = parser.parse_args()
exe(args.annotation, args.output_csv, args.output_gtf, args.transcript_number)