Skip to content
Snippets Groups Projects
Commit abf6458e authored by Melvin Alappat's avatar Melvin Alappat
Browse files

deleting old file

parent 7ae6757a
Branches
No related tags found
1 merge request!16Issue_4
Pipeline #13867 failed
"""Imports."""
import numpy as np
import scipy.constants
import argparse
from pathlib import Path
class Probability:
"""Calculates the probability of priming and write the gff file."""
# adding parser
parser = argparse.ArgumentParser(
description="Fasta-file input",
add_help=False,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
# add arguments
parser.add_argument(
'input_file',
type=lambda p: Path(p).absolute(),
metavar="PATH",
help="path to fasta-file",
)
args = parser.parse_args()
def InterPara(path):
"""Open the RIblast output file and read only the parameter lines.
Args:
Path to Fasta-file
Returns:
my_list (list): Contains all the paramter lines from RIblast
"""
# myfile = open(sys.argv[1], "r") # ouput of RIblast
myfile = open(path, "r")
mylist = [] # all lines of Energies starting with an ID-number
for myline in myfile: # Read lines containing needed data
if myline[0].isdigit():
mylist.append(myline)
else:
continue
myfile.close()
return(mylist)
data = InterPara(args.input_file)
def InterProb(data_list):
"""Calculate the prob. and make the gff file.
Args:
data_list (list): Contains all parameters of RIblast
Returns:
gff (file): Gff file contains all the output information
"""
# count interactions per script through fasta ID (first line of fasta)
mycounter = open("../inputs/transcript.fasta", "r")
mycounter_list = []
for mylinecounter in mycounter:
if mylinecounter.startswith(">"):
a = mylinecounter
a = mylinecounter.replace(">", "")
b = a.replace("\n", "")
mycounter_list.append(b)
else:
continue
counter = 0
counter_list = []
for cc in range(0, len(mycounter_list)):
for dd in range(0, len(data_list)):
if mycounter_list[cc] in data_list[dd]:
counter = counter + 1
else:
continue
counter_list.append(counter)
counter = 0
para_list = []
for i in range(0, len(data_list)):
x = data_list[i].split(",")
para_list.append(x)
# splitting each list item by the "," this results in a 2-D list
for j in range(0, len(para_list)):
del para_list[j][1:-2]
# only keeps the ID-numer, the interaction
# energy, and interaction site of both sequences. (still a 2D-list)
for d in range(0, len(para_list)): # Optimize location output
a = para_list[d][2].split(":")
a[1] = a[1].replace(") ", "")
a[1] = a[1].replace("\n", "")
a[1] = a[1].replace("-", " ")
a[1] = a[1].split(" ")
para_list[d][2] = a[1]
for k in range(0, len(para_list)): # type-conversion of ID and E
for w in range(0, 2):
para_list[k][w] = float(para_list[k][w])
for z in range(0, len(para_list)): # from kcal/mol to Joule/mol
para_list[z][1] = para_list[z][1] * 4184
kT = scipy.constants.R * 300.15 # calculating gas constant R * T
for u in range(0, len(para_list)): # calculating -E / RT
para_list[u][1] = (-(para_list[u][1])/kT)
prob_list = [] # List containing all the prob.
for h in range(0, len(para_list)): # calculating the e^(-E/kT)
probab = np.exp(para_list[h][1])
prob_list.append(probab)
para_list[h][1] = probab
count_sum = 0
sum_list = []
prob_list2 = prob_list.copy()
for jj in range(0, len(counter_list)):
for ii in range(0, counter_list[jj]):
count_sum = count_sum + prob_list[ii]
sum_list.append(count_sum)
count_sum = 0
del prob_list[0:counter_list[jj]]
real_prob = []
for jj in range(0, len(sum_list)):
for ii in range(0, counter_list[jj]):
prob_list2[ii] = prob_list2[ii]/sum_list[jj]
real_prob.append(prob_list2[ii])
del prob_list2[0:counter_list[jj]] # Normalized probabilities
# real_prob contains all the linearized probabilities
for vv in range(0, len(para_list)):
para_list[vv][1] = real_prob[vv]
final_list = []
for bb in range(0, len(sum_list)): # Insert ID in paralist
for ss in range(0, counter_list[bb]):
para_list[ss][0] = mycounter_list[bb]
final_list.append(para_list[ss])
del para_list[0:counter_list[bb]]
gff = open("../inputs/Potential_Priming_sites.txt", "w+") # gff file
for ll in range(0, len(final_list)):
gff.write(str(final_list[ll][0]) +
"\tRIblast\ttranscript\t" +
str(final_list[ll][2][1])+"\t" +
str(final_list[ll][2][0])+"\t" +
str(final_list[ll][1])+"\t.\t.\t.\n")
gff.close
return gff
InterProb(data)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment