Skip to content
Snippets Groups Projects
Commit 0329e263 authored by Melvin Alappat's avatar Melvin Alappat
Browse files

fix: remove unnecessary class

parent 4456ea0f
No related branches found
No related tags found
1 merge request!16Issue_4
Pipeline #13906 passed
"""Imports.""" """Calculates the probability of priming and write the gff file."""
import numpy as np import numpy as np
import scipy.constants # type: ignore import scipy.constants # type: ignore
class Probability: def inter_para(input_path):
"""Calculates the probability of priming and write the gff file.""" """Open the RIblast output file and read only the parameter lines.
def inter_para(input_path): Args:
"""Open the RIblast output file and read only the parameter lines. Path to Energies.txt file
Args: Returns:
Path to Energies.txt file my_list (list): Contains the paramter lines from Energies.txt
"""
Energies = open(input_path, "r") # ouput of RIblast
Returns: mylist = [] # all lines of Energies starting with an ID-number
my_list (list): Contains the paramter lines from Energies.txt
"""
Energies = open(input_path, "r") # ouput of RIblast
mylist = [] # all lines of Energies starting with an ID-number for myline in Energies: # Read lines containing needed data
if myline[0].isdigit():
mylist.append(myline)
elif myline[0].isdigit() is False:
continue
for myline in Energies: # Read lines containing needed data Energies.close()
if myline[0].isdigit():
mylist.append(myline)
elif myline[0].isdigit() is False:
continue
Energies.close() return(mylist)
return(mylist)
def inter_prob(data_list, fasta_path, output_path): def inter_prob(data_list, fasta_path, output_path):
"""Calculate the prob. and write the gff file. """Calculate the prob. and write the gff file.
Args: Args:
data_list (list): Contains all parameters of Energies.txt data_list (list): Contains all parameters of Energies.txt
fasta_path (path): Path to fasta file fasta_path (path): Path to fasta file
output_path (path): Path to output file output_path (path): Path to output file
Returns: Returns:
gff (file): Gff file contains all the output information gff (file): Gff file contains all the output information
""" """
# count interactions per script through fasta-ID # count interactions per script through fasta-ID
fastafile = open(fasta_path, "r") fastafile = open(fasta_path, "r")
id_list = [] # contains list of transcipt IDs id_list = [] # contains list of transcipt IDs
for mylinecounter in fastafile: for mylinecounter in fastafile:
if mylinecounter.startswith(">"): if mylinecounter.startswith(">"):
a = mylinecounter a = mylinecounter
a = mylinecounter.replace(">", "") a = mylinecounter.replace(">", "")
b = a.split() b = a.split()
c = b[0] c = b[0]
id_list.append(c) id_list.append(c)
elif mylinecounter.startswith(">") is False: elif mylinecounter.startswith(">") is False:
continue continue
counter = 0 counter = 0
counter_list = [] # list of number of interactions per transcript counter_list = [] # list of number of interactions per transcript
for cc in range(0, len(id_list)): for cc in range(0, len(id_list)):
for dd in range(0, len(data_list)): for dd in range(0, len(data_list)):
if id_list[cc] in data_list[dd]: if id_list[cc] in data_list[dd]:
counter = counter + 1 counter = counter + 1
counter_list.append(counter) counter_list.append(counter)
counter = 0 counter = 0
para_list = [] para_list = []
for i in range(0, len(data_list)): for i in range(0, len(data_list)):
x = data_list[i].split(",") x = data_list[i].split(",")
para_list.append(x) para_list.append(x)
# splitting each list item by the "," this results in a 2D list # splitting each list item by the "," this results in a 2D list
for j in range(0, len(para_list)): for j in range(0, len(para_list)):
del para_list[j][1:-2] del para_list[j][1:-2]
# only keeps the ID-numer, the interaction # only keeps the ID-numer, the interaction
# energy, and interaction site of both sequences. (still a 2D-list) # energy, and interaction site of both sequences. (still a 2D-list)
start_end_index = 1 # index of start and end in the list start_end_index = 1 # index of start and end in the list
for d in range(0, len(para_list)): # Optimize location output for d in range(0, len(para_list)): # Optimize location output
a = para_list[d][2].split(":") a = para_list[d][2].split(":")
a[start_end_index] = a[start_end_index].replace(") ", "") a[start_end_index] = a[start_end_index].replace(") ", "")
a[start_end_index] = a[start_end_index].replace("\n", "") a[start_end_index] = a[start_end_index].replace("\n", "")
a[start_end_index] = a[start_end_index].replace("-", " ") a[start_end_index] = a[start_end_index].replace("-", " ")
a[start_end_index] = a[start_end_index].split(" ") a[start_end_index] = a[start_end_index].split(" ")
para_list[d][2] = a[start_end_index] para_list[d][2] = a[start_end_index]
for k in range(0, len(para_list)): # type-conversion of ID and E for k in range(0, len(para_list)): # type-conversion of ID and E
for w in range(0, 2): # "2" because first two elements in each sublist are ID and E for w in range(0, 2): # "2" because first two elements in each sublist are ID and E
para_list[k][w] = float(para_list[k][w]) para_list[k][w] = float(para_list[k][w])
joule = 4184 # 1kcal = 4184 joule joule = 4184 # 1kcal = 4184 joule
inter_energy = 1 # index of inter. energy in list inter_energy = 1 # index of inter. energy in list
for z in range(0, len(para_list)): # from kcal/mol to Joule/mol for z in range(0, len(para_list)): # from kcal/mol to Joule/mol
para_list[z][inter_energy] = para_list[z][inter_energy] * joule para_list[z][inter_energy] = para_list[z][inter_energy] * joule
T = 300.15 # Roomtemperature (27 degree celsius) in Kelvin T = 300.15 # Roomtemperature (27 degree celsius) in Kelvin
KT = scipy.constants.R * T # calculating gas constant R * T KT = scipy.constants.R * T # calculating gas constant R * T
for u in range(0, len(para_list)): # calculating -E / RT for u in range(0, len(para_list)): # calculating -E / RT
para_list[u][inter_energy] = (-(para_list[u][inter_energy])/KT) para_list[u][inter_energy] = (-(para_list[u][inter_energy])/KT)
prob_list = [] # List containing all the prob. prob_list = [] # List containing all the prob.
for h in range(0, len(para_list)): # calculating the e^(-E/kT) for h in range(0, len(para_list)): # calculating the e^(-E/kT)
probab = np.exp(para_list[h][inter_energy]) probab = np.exp(para_list[h][inter_energy])
prob_list.append(probab) prob_list.append(probab)
para_list[h][inter_energy] = probab para_list[h][inter_energy] = probab
probability_sum = 0 # variable to calculate sum of probabilities per transcipt probability_sum = 0 # variable to calculate sum of probabilities per transcipt
sum_list = [] # List containing all the sums sum_list = [] # List containing all the sums
prob_list2 = prob_list.copy() prob_list2 = prob_list.copy()
for jj in range(0, len(counter_list)): for jj in range(0, len(counter_list)):
for ii in range(0, counter_list[jj]): for ii in range(0, counter_list[jj]):
probability_sum = probability_sum + prob_list[ii] probability_sum = probability_sum + prob_list[ii]
sum_list.append(probability_sum) sum_list.append(probability_sum)
probability_sum = 0 # setting back to 0, for next interactions probability_sum = 0 # setting back to 0, for next interactions
del prob_list[0:counter_list[jj]] del prob_list[0:counter_list[jj]]
real_prob = [] # contains all the normalized probabilities real_prob = [] # contains all the normalized probabilities
for jj in range(0, len(sum_list)): for jj in range(0, len(sum_list)):
for ii in range(0, counter_list[jj]): for ii in range(0, counter_list[jj]):
prob_list2[ii] = prob_list2[ii]/sum_list[jj] prob_list2[ii] = prob_list2[ii]/sum_list[jj]
real_prob.append(prob_list2[ii]) real_prob.append(prob_list2[ii])
del prob_list2[0:counter_list[jj]] # Normalized probabilities del prob_list2[0:counter_list[jj]] # Normalized probabilities
for vv in range(0, len(para_list)): # inserting the normalized values in para_list for vv in range(0, len(para_list)): # inserting the normalized values in para_list
para_list[vv][1] = real_prob[vv] para_list[vv][1] = real_prob[vv]
final_list = [] # List containing all the final paramters to print final_list = [] # List containing all the final paramters to print
for bb in range(0, len(sum_list)): # Insert ID in paralist for bb in range(0, len(sum_list)): # Insert ID in paralist
for ss in range(0, counter_list[bb]): for ss in range(0, counter_list[bb]):
para_list[ss][0] = id_list[bb] para_list[ss][0] = id_list[bb]
final_list.append(para_list[ss]) final_list.append(para_list[ss])
del para_list[0:counter_list[bb]] del para_list[0:counter_list[bb]]
gff = open(output_path, "w+") # gff file gff = open(output_path, "w+") # gff file
for ll in range(0, len(final_list)): # writing gff file for ll in range(0, len(final_list)): # writing gff file
gff.write(str(final_list[ll][0]) + # transcript ID gff.write(str(final_list[ll][0]) + # Transcript ID
"\tRIblast\tPriming Site\t" + "\tRIblast\tPriming Site\t" +
str(final_list[ll][2][1])+"\t" + # start str(final_list[ll][2][1])+"\t" + # Start
str(final_list[ll][2][0])+"\t" + # end str(final_list[ll][2][0])+"\t" + # End
str(final_list[ll][1])+"\t.\t.\t.\n") # probability str(final_list[ll][1])+"\t.\t.\t.\n") # probability
gff.close gff.close
return final_list return final_list
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment