diff --git a/src/PrimingProb.py b/src/PrimingProb.py new file mode 100644 index 0000000000000000000000000000000000000000..bb755fc731b4746c76517a34a544e5f566daeaf6 --- /dev/null +++ b/src/PrimingProb.py @@ -0,0 +1,101 @@ +"""Imports.""" +import numpy as np +import scipy.constants + + +class Probability: + """Calculates the probability of priming and write the gff file.""" + + def InterPara(): + """Open the RIblast output file and read only the parameter lines. + + Args: + None + + Returns: + my_list (list): Contains all the paramter lines from RIblast + """ + myfile = open("./Docker-Files/Energies.txt", "r") # ouput of RIblast + + mylist = [] # all lines of Energies starting with an ID-number + + for myline in myfile: # Read lines containing needed data + if myline[0].isdigit(): + mylist.append(myline) + else: + continue + myfile.close() + + return(mylist) + + data = InterPara() + + def InterProb(data_list): + """Calculate the prob. and make the gff file. + + Args: + data_list (list): Contains all parameters of RIblast + + Returns: + gff (file): Gff file contains all the output information + """ + para_list = [] + + for i in range(0, len(data_list)): + x = data_list[i].split(",") + para_list.append(x) + # splitting each list item by the "," this results in a 2-D list + + for j in range(0, len(para_list)): + del para_list[j][1:-2] + # only keeps the ID-numer, the interaction + # energy, and interaction site of both sequences. (still a 2D-list) + + for d in range(0, len(para_list)): # Optimize location output + a = para_list[d][2].split(":") + a[1] = a[1].replace(") ", "") + a[1] = a[1].replace("\n", "") + a[1] = a[1].replace("-", " ") + a[1] = a[1].split(" ") + para_list[d][2] = a[1] + + for k in range(0, len(para_list)): # type-conversion of ID and E + for l in range(0, 2): + para_list[k][l] = float(para_list[k][l]) + + for z in range(0, len(para_list)): # from kcal/mol to Joule/mol + para_list[z][1] = para_list[z][1] * 4184 + + kT = scipy.constants.R * 300.15 # calculating gas constant R * T + + for u in range(0, len(para_list)): # calculating -E / RT + para_list[u][1] = (-(para_list[u][1])/kT) + + prob_list = [] # List containing all the prob. + + for h in range(0, len(para_list)): # calculating the e^(-E/kT) + probab = np.exp(para_list[h][1]) + prob_list.append(probab) + para_list[h][1] = probab + + prob_list_sum = sum(prob_list) # Sum of all probabilities + real_prob = [] + + for v in range(0, len(para_list)): # Normalized probabilities + prob_linear = (para_list[v][1])/prob_list_sum + real_prob.append(prob_linear) + para_list[v][1] = prob_linear + + gff = open("./output/Potential_Priming_sites.txt", "w+") # gff file + + for i in range(0, len(para_list)): + gff.write("Interactions: "+str(para_list[i][0]) + + "\tRIblast\ttranscript\t"+str(para_list[i][2][1])+"\t" + + str(para_list[i][2][0])+"\t" + + str(para_list[i][1])+"\t.\t.\t.\n") + + gff.close + + return gff + + InterProb(data)