diff --git a/src/PrimingProb_Final.py b/src/PrimingProb_Final.py deleted file mode 100644 index eadb3df52faee6bc7126f482fab963c152de0d42..0000000000000000000000000000000000000000 --- a/src/PrimingProb_Final.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Imports.""" -import numpy as np -import scipy.constants -import argparse -from pathlib import Path - - -class Probability: - """Calculates the probability of priming and write the gff file.""" - - # adding parser - parser = argparse.ArgumentParser( - description="Fasta-file input", - add_help=False, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - # add arguments - parser.add_argument( - 'input_file', - type=lambda p: Path(p).absolute(), - metavar="PATH", - help="path to fasta-file", - ) - - args = parser.parse_args() - - def InterPara(path): - """Open the RIblast output file and read only the parameter lines. - - Args: - Path to Fasta-file - - Returns: - my_list (list): Contains all the paramter lines from RIblast - """ - # myfile = open(sys.argv[1], "r") # ouput of RIblast - myfile = open(path, "r") - - mylist = [] # all lines of Energies starting with an ID-number - - for myline in myfile: # Read lines containing needed data - if myline[0].isdigit(): - mylist.append(myline) - else: - continue - - myfile.close() - - return(mylist) - - data = InterPara(args.input_file) - - def InterProb(data_list): - """Calculate the prob. and make the gff file. - - Args: - data_list (list): Contains all parameters of RIblast - - Returns: - gff (file): Gff file contains all the output information - """ - # count interactions per script through fasta ID (first line of fasta) - mycounter = open("../inputs/transcript.fasta", "r") - - mycounter_list = [] - - for mylinecounter in mycounter: - if mylinecounter.startswith(">"): - a = mylinecounter - a = mylinecounter.replace(">", "") - b = a.replace("\n", "") - mycounter_list.append(b) - else: - continue - - counter = 0 - counter_list = [] - - for cc in range(0, len(mycounter_list)): - for dd in range(0, len(data_list)): - if mycounter_list[cc] in data_list[dd]: - counter = counter + 1 - else: - continue - counter_list.append(counter) - counter = 0 - - para_list = [] - - for i in range(0, len(data_list)): - x = data_list[i].split(",") - para_list.append(x) - # splitting each list item by the "," this results in a 2-D list - - for j in range(0, len(para_list)): - del para_list[j][1:-2] - # only keeps the ID-numer, the interaction - # energy, and interaction site of both sequences. (still a 2D-list) - - for d in range(0, len(para_list)): # Optimize location output - a = para_list[d][2].split(":") - a[1] = a[1].replace(") ", "") - a[1] = a[1].replace("\n", "") - a[1] = a[1].replace("-", " ") - a[1] = a[1].split(" ") - para_list[d][2] = a[1] - - for k in range(0, len(para_list)): # type-conversion of ID and E - for w in range(0, 2): - para_list[k][w] = float(para_list[k][w]) - - for z in range(0, len(para_list)): # from kcal/mol to Joule/mol - para_list[z][1] = para_list[z][1] * 4184 - - kT = scipy.constants.R * 300.15 # calculating gas constant R * T - - for u in range(0, len(para_list)): # calculating -E / RT - para_list[u][1] = (-(para_list[u][1])/kT) - - prob_list = [] # List containing all the prob. - - for h in range(0, len(para_list)): # calculating the e^(-E/kT) - probab = np.exp(para_list[h][1]) - prob_list.append(probab) - para_list[h][1] = probab - - count_sum = 0 - sum_list = [] - - prob_list2 = prob_list.copy() - - for jj in range(0, len(counter_list)): - for ii in range(0, counter_list[jj]): - count_sum = count_sum + prob_list[ii] - sum_list.append(count_sum) - count_sum = 0 - del prob_list[0:counter_list[jj]] - - real_prob = [] - - for jj in range(0, len(sum_list)): - for ii in range(0, counter_list[jj]): - prob_list2[ii] = prob_list2[ii]/sum_list[jj] - real_prob.append(prob_list2[ii]) - del prob_list2[0:counter_list[jj]] # Normalized probabilities - - # real_prob contains all the linearized probabilities - - for vv in range(0, len(para_list)): - para_list[vv][1] = real_prob[vv] - - final_list = [] - - for bb in range(0, len(sum_list)): # Insert ID in paralist - for ss in range(0, counter_list[bb]): - para_list[ss][0] = mycounter_list[bb] - final_list.append(para_list[ss]) - del para_list[0:counter_list[bb]] - - gff = open("../inputs/Potential_Priming_sites.txt", "w+") # gff file - - for ll in range(0, len(final_list)): - gff.write(str(final_list[ll][0]) + - "\tRIblast\ttranscript\t" + - str(final_list[ll][2][1])+"\t" + - str(final_list[ll][2][0])+"\t" + - str(final_list[ll][1])+"\t.\t.\t.\n") - - gff.close - - return gff - - InterProb(data)