Skip to content
Snippets Groups Projects

Issue_4

Closed Melvin Alappat requested to merge Issue_4 into main
Compare and
2 files
+ 199
0
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 173
0
 
"""Imports."""
 
import numpy as np
 
import scipy.constants
 
import argparse
 
from pathlib import Path
 
 
 
class Probability:
 
"""Calculates the probability of priming and write the gff file."""
 
 
# adding parser
 
parser = argparse.ArgumentParser(
 
description="Fasta-file input",
 
add_help=False,
 
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 
)
 
# add arguments
 
parser.add_argument(
 
'input_file',
 
type=lambda p: Path(p).absolute(),
 
metavar="PATH",
 
help="path to fasta-file",
 
)
 
 
args = parser.parse_args()
 
 
def InterPara(path):
 
"""Open the RIblast output file and read only the parameter lines.
 
 
Args:
 
Path to Fasta-file
 
 
Returns:
 
my_list (list): Contains all the paramter lines from RIblast
 
"""
 
# myfile = open(sys.argv[1], "r") # ouput of RIblast
 
myfile = open(path, "r")
 
 
mylist = [] # all lines of Energies starting with an ID-number
 
 
for myline in myfile: # Read lines containing needed data
 
if myline[0].isdigit():
 
mylist.append(myline)
 
else:
 
continue
 
 
myfile.close()
 
 
return(mylist)
 
 
data = InterPara(args.input_file)
 
 
def InterProb(data_list):
 
"""Calculate the prob. and make the gff file.
 
 
Args:
 
data_list (list): Contains all parameters of RIblast
 
 
Returns:
 
gff (file): Gff file contains all the output information
 
"""
 
# count interactions per script through fasta ID (first line of fasta)
 
mycounter = open("../inputs/transcript.fasta", "r")
 
 
mycounter_list = []
 
 
for mylinecounter in mycounter:
 
if mylinecounter.startswith(">"):
 
a = mylinecounter
 
a = mylinecounter.replace(">", "")
 
b = a.replace("\n", "")
 
mycounter_list.append(b)
 
else:
 
continue
 
 
counter = 0
 
counter_list = []
 
 
for cc in range(0, len(mycounter_list)):
 
for dd in range(0, len(data_list)):
 
if mycounter_list[cc] in data_list[dd]:
 
counter = counter + 1
 
else:
 
continue
 
counter_list.append(counter)
 
counter = 0
 
 
para_list = []
 
 
for i in range(0, len(data_list)):
 
x = data_list[i].split(",")
 
para_list.append(x)
 
# splitting each list item by the "," this results in a 2-D list
 
 
for j in range(0, len(para_list)):
 
del para_list[j][1:-2]
 
# only keeps the ID-numer, the interaction
 
# energy, and interaction site of both sequences. (still a 2D-list)
 
 
for d in range(0, len(para_list)): # Optimize location output
 
a = para_list[d][2].split(":")
 
a[1] = a[1].replace(") ", "")
 
a[1] = a[1].replace("\n", "")
 
a[1] = a[1].replace("-", " ")
 
a[1] = a[1].split(" ")
 
para_list[d][2] = a[1]
 
 
for k in range(0, len(para_list)): # type-conversion of ID and E
 
for w in range(0, 2):
 
para_list[k][w] = float(para_list[k][w])
 
 
for z in range(0, len(para_list)): # from kcal/mol to Joule/mol
 
para_list[z][1] = para_list[z][1] * 4184
 
 
kT = scipy.constants.R * 300.15 # calculating gas constant R * T
 
 
for u in range(0, len(para_list)): # calculating -E / RT
 
para_list[u][1] = (-(para_list[u][1])/kT)
 
 
prob_list = [] # List containing all the prob.
 
 
for h in range(0, len(para_list)): # calculating the e^(-E/kT)
 
probab = np.exp(para_list[h][1])
 
prob_list.append(probab)
 
para_list[h][1] = probab
 
 
count_sum = 0
 
sum_list = []
 
 
prob_list2 = prob_list.copy()
 
 
for jj in range(0, len(counter_list)):
 
for ii in range(0, counter_list[jj]):
 
count_sum = count_sum + prob_list[ii]
 
sum_list.append(count_sum)
 
count_sum = 0
 
del prob_list[0:counter_list[jj]]
 
 
real_prob = []
 
 
for jj in range(0, len(sum_list)):
 
for ii in range(0, counter_list[jj]):
 
prob_list2[ii] = prob_list2[ii]/sum_list[jj]
 
real_prob.append(prob_list2[ii])
 
del prob_list2[0:counter_list[jj]] # Normalized probabilities
 
 
# real_prob contains all the linearized probabilities
 
 
for vv in range(0, len(para_list)):
 
para_list[vv][1] = real_prob[vv]
 
 
final_list = []
 
 
for bb in range(0, len(sum_list)): # Insert ID in paralist
 
for ss in range(0, counter_list[bb]):
 
para_list[ss][0] = mycounter_list[bb]
 
final_list.append(para_list[ss])
 
del para_list[0:counter_list[bb]]
 
 
gff = open("../inputs/Potential_Priming_sites.txt", "w+") # gff file
 
 
for ll in range(0, len(final_list)):
 
gff.write(str(final_list[ll][0]) +
 
"\tRIblast\ttranscript\t" +
 
str(final_list[ll][2][1])+"\t" +
 
str(final_list[ll][2][0])+"\t" +
 
str(final_list[ll][1])+"\t.\t.\t.\n")
 
 
gff.close
 
 
return gff
 
 
InterProb(data)
Loading