Skip to content
Snippets Groups Projects
Commit 4ac6646b authored by Max Bär's avatar Max Bär
Browse files

updated the main and postprocessing according to mihaelas feedback

parent 5c4e1b1d
No related branches found
No related tags found
3 merge requests!43Feature max,!42Write output,!41Write output to gtf
...@@ -6,6 +6,7 @@ Created on Mon Nov 14 14:49:50 2022 ...@@ -6,6 +6,7 @@ Created on Mon Nov 14 14:49:50 2022
""" """
import argparse import argparse
import logging import logging
import main
def create_parser(): def create_parser():
"""This function creates the parser""" """This function creates the parser"""
...@@ -30,5 +31,5 @@ if __name__ == '__main__': ...@@ -30,5 +31,5 @@ if __name__ == '__main__':
level=logging.INFO, level=logging.INFO,
) )
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
letsgo() main()
#here we would point to the main module and parse the energy cutoff #here we would point to the main module and parse the energy cutoff
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 16 17:48:04 2022
@author: baerma
"""
# with open("RIBlast output example.txt", 'r') as file:
# content = file.readlines()
# print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':'))
# #create a instant of each transcript class
# print(len(content[3:]))
# print((content[3:]))
import pandas as pd
import math
class CreateTranscript():
def __init__(self):
with open("RIBlast output example.txt", 'r') as file:
self.raw_interactions = file.readlines()[3:]
def generate_interaction_df(self):
self.interaction_list = []
#clean up the original list so that we have a neet list
for i in range(0, (len(self.raw_interactions)-1)):
current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
self.interaction_list.append(current_interaction)
#identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite)
previous_interaction_base = int
for i in range(0, len(self.interaction_list)):
previous_interaction_base = int(self.interaction_list[i-1][13])
if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1):
self.interaction_list[i].append('Repeat')
else :
self.interaction_list[i].append('Not_repeat')
#exclude all interactions which are a repeat and belong to the same bindingsite
self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat']
#add total number of interactions per transcript and calculate energy
self.df = pd.DataFrame(self.cleaned_interaction_list)
self.df['Number of interactions'] = int
self.df['Interaction Energy'] = float
energy_constant = 1.380649*10**(-23)*298
kcalmol_joul = 6.9477*10**-21
for ind in self.df.index:
self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]]
self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant)
print(self.df['Interaction Energy'])
print(self.df)
return self.df
transcripts = CreateTranscript()
interaction_df = transcripts.generate_interaction_df()
#print line by line to file and then you're done
output = str()
for i in interaction_df.index:
#print(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"')
output = output + str(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"' + '\n')
print(output)
with open('output_transcripts_df.txt', 'w') as f:
f.write(output)
import sys import sys
#from .classmodule import MyClass from createprimer import CreatePrimer
#from .funcmodule import my_function from postprocessing import PostProcessRIBlast
def main(): def main():
print('in main') generate_RIBlast_input()
args = sys.argv[1:] create_gtf()
print('count of args :: {}'.format(len(args)))
for arg in args:
print('passed argument :: {}'.format(arg))
#my_function('Hello World') def generate_RIBlast_input():
#my_object = MyClass('Robin') """This function creates a list of the filenames for the RIBlast"""
#my_object.say_name() my_primer = CreatePrimer()
my_primer.create_fasta()
primer_filename = my_primer.name +".fasta"
transcripts_filename = "transcripts.fasta"
return [primer_filename, transcripts_filename]
def create_gtf():
gtf_file = PostProcessRIBlast().output
print(gtf_file)
main()
if __name__ == '__main__': if __name__ == '__main__':
main() main()
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 20 14:06:20 2022
@author: baerma
"""
import pandas as pd
import math
pd.options.mode.chained_assignment = None
class PostProcessRIBlast():
def __init__(self):
output = self.generate_gtf()
#print(output)
def calculate_energy(self, value):
energy_constant = 1.380649*10**(-23)*298
kcalmol_joul = 6.9477*10**-21
return (math.exp(-float(value)*kcalmol_joul/energy_constant))
def create_list_from_output(self):
self.file = "RIBlast output example.txt"
self.firstline = 3
self.interaction_list = []
with open(self.file, 'r') as file:
self.raw_interactions = file.readlines()[self.firstline:]
self.number_entries = len(self.raw_interactions)
for i in range(0, self.number_entries-1):
current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
self.interaction_list.append(current_interaction)
return self.interaction_list
def create_pandas_df(self):
self.interaction_list = self.create_list_from_output()
self.df = pd.DataFrame(self.interaction_list)
self.df['Number_of_interactions'] = int(0)
self.df['Interaction_Energy'] = float(0)
self.transcript = 3
self.energy = 5
for index in self.df.index:
self.df['Number_of_interactions'][index]=self.df[self.transcript].value_counts()[self.df[self.transcript][index]]
self.df['Interaction_Energy'][index]=self.calculate_energy(self.df[self.energy][index])
self.df['Normalised_interaction_energy']=self.df['Interaction_Energy']/self.df['Number_of_interactions']
return self.df
def generate_gtf(self):
self.interaction_df = self.create_pandas_df()
self.output = str()
for index in self.interaction_df.index:
self.output = self.output + str(self.interaction_df[3][index]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + self.interaction_df[13][index] + '\t' + self.interaction_df[12][index] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Interaction_Energy "{self.interaction_df["Normalised_interaction_energy"][index]}"' + '\n')
return(self.output)
#print(PostProcessRIBlast().output)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 16 16:47:20 2022
@author: baerma
"""
from createtranscript import CreateTranscript
transcripts = CreateTranscript()
interaction_df = transcripts.generate_interaction_df()
#os.chdir('C:/Users/baerma/Desktop/PhD-Local/Lectures/Programming for Life Sciences/priming-site-predictor/primingsitepredictor')
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 16 14:17:06 2022
@author: baerma
"""
from createprimer import CreatePrimer
def generate_RIBlast_input():
"""This function creates a list of the filenames for the RIBlast"""
my_primer = CreatePrimer()
my_primer.create_fasta()
primer_filename = my_primer.name +".fasta"
transcripts_filename = "transcripts.fasta"
return [primer_filename, transcripts_filename]
print(generate_RIBlast_input())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment