diff --git a/primingsitepredictor/Primer1.fasta b/primingsitepredictor/Primer1.fasta index 61ea48b62158516ac0c24d630eb7796c7a745485..545f1100114ec5eb992d22bc71d364b8663a7638 100644 --- a/primingsitepredictor/Primer1.fasta +++ b/primingsitepredictor/Primer1.fasta @@ -1,2 +1,2 @@ <primer1 -TTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTT diff --git a/primingsitepredictor/cli.py b/primingsitepredictor/cli.py index 518fea18a365aae2b8aa4eac5d264966adba984e..5b665962b5b9b5aa698793c7fe4e3d4cba2db16f 100644 --- a/primingsitepredictor/cli.py +++ b/primingsitepredictor/cli.py @@ -6,6 +6,7 @@ Created on Mon Nov 14 14:49:50 2022 """ import argparse import logging +import main def create_parser(): """This function creates the parser""" @@ -30,5 +31,5 @@ if __name__ == '__main__': level=logging.INFO, ) LOG = logging.getLogger(__name__) - letsgo() + main() #here we would point to the main module and parse the energy cutoff diff --git a/primingsitepredictor/createtranscript.py b/primingsitepredictor/createtranscript.py deleted file mode 100644 index dddeb1fc4374b5e17da87e30c395b91340da2d87..0000000000000000000000000000000000000000 --- a/primingsitepredictor/createtranscript.py +++ /dev/null @@ -1,80 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Nov 16 17:48:04 2022 - -@author: baerma -""" - -# with open("RIBlast output example.txt", 'r') as file: -# content = file.readlines() -# print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':')) -# #create a instant of each transcript class -# print(len(content[3:])) -# print((content[3:])) - -import pandas as pd -import math - -class CreateTranscript(): - def __init__(self): - with open("RIBlast output example.txt", 'r') as file: - self.raw_interactions = file.readlines()[3:] - - - - def generate_interaction_df(self): - - self.interaction_list = [] - - #clean up the original list so that we have a neet list - for i in range(0, (len(self.raw_interactions)-1)): - current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',') - self.interaction_list.append(current_interaction) - - #identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite) - previous_interaction_base = int - for i in range(0, len(self.interaction_list)): - previous_interaction_base = int(self.interaction_list[i-1][13]) - if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1): - self.interaction_list[i].append('Repeat') - else : - self.interaction_list[i].append('Not_repeat') - - #exclude all interactions which are a repeat and belong to the same bindingsite - self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat'] - - - #add total number of interactions per transcript and calculate energy - self.df = pd.DataFrame(self.cleaned_interaction_list) - self.df['Number of interactions'] = int - self.df['Interaction Energy'] = float - - energy_constant = 1.380649*10**(-23)*298 - kcalmol_joul = 6.9477*10**-21 - - - for ind in self.df.index: - self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]] - self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant) - print(self.df['Interaction Energy']) - print(self.df) - - - return self.df - - - -transcripts = CreateTranscript() -interaction_df = transcripts.generate_interaction_df() - -#print line by line to file and then you're done - -output = str() -for i in interaction_df.index: - #print(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"') - output = output + str(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"' + '\n') - -print(output) -with open('output_transcripts_df.txt', 'w') as f: - f.write(output) - diff --git a/primingsitepredictor/main.py b/primingsitepredictor/main.py index e1488c300ab894afa325de33644271716cdb1b61..07348dd4b21c73c4a8be7d07a2f351e1cab7fd5f 100644 --- a/primingsitepredictor/main.py +++ b/primingsitepredictor/main.py @@ -1,17 +1,28 @@ import sys -#from .classmodule import MyClass -#from .funcmodule import my_function +from createprimer import CreatePrimer +from postprocessing import PostProcessRIBlast def main(): - print('in main') - args = sys.argv[1:] - print('count of args :: {}'.format(len(args))) - for arg in args: - print('passed argument :: {}'.format(arg)) + generate_RIBlast_input() + create_gtf() + - #my_function('Hello World') - #my_object = MyClass('Robin') - #my_object.say_name() +def generate_RIBlast_input(): + """This function creates a list of the filenames for the RIBlast""" + my_primer = CreatePrimer() + my_primer.create_fasta() + primer_filename = my_primer.name +".fasta" + transcripts_filename = "transcripts.fasta" + + return [primer_filename, transcripts_filename] + +def create_gtf(): + gtf_file = PostProcessRIBlast().output + print(gtf_file) + + + +main() if __name__ == '__main__': main() diff --git a/primingsitepredictor/postprocessing.py b/primingsitepredictor/postprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..11687931ff27ddaf64682fd8655ff054ab2a6c7e --- /dev/null +++ b/primingsitepredictor/postprocessing.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Dec 20 14:06:20 2022 + +@author: baerma +""" + +import pandas as pd +import math +pd.options.mode.chained_assignment = None + +class PostProcessRIBlast(): + + def __init__(self): + output = self.generate_gtf() + #print(output) + + + def calculate_energy(self, value): + energy_constant = 1.380649*10**(-23)*298 + kcalmol_joul = 6.9477*10**-21 + return (math.exp(-float(value)*kcalmol_joul/energy_constant)) + + + def create_list_from_output(self): + self.file = "RIBlast output example.txt" + self.firstline = 3 + self.interaction_list = [] + with open(self.file, 'r') as file: + self.raw_interactions = file.readlines()[self.firstline:] + self.number_entries = len(self.raw_interactions) + + for i in range(0, self.number_entries-1): + current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',') + self.interaction_list.append(current_interaction) + + return self.interaction_list + + + def create_pandas_df(self): + self.interaction_list = self.create_list_from_output() + self.df = pd.DataFrame(self.interaction_list) + self.df['Number_of_interactions'] = int(0) + self.df['Interaction_Energy'] = float(0) + self.transcript = 3 + self.energy = 5 + + for index in self.df.index: + self.df['Number_of_interactions'][index]=self.df[self.transcript].value_counts()[self.df[self.transcript][index]] + self.df['Interaction_Energy'][index]=self.calculate_energy(self.df[self.energy][index]) + + self.df['Normalised_interaction_energy']=self.df['Interaction_Energy']/self.df['Number_of_interactions'] + + return self.df + + + def generate_gtf(self): + self.interaction_df = self.create_pandas_df() + self.output = str() + + for index in self.interaction_df.index: + self.output = self.output + str(self.interaction_df[3][index]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + self.interaction_df[13][index] + '\t' + self.interaction_df[12][index] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Interaction_Energy "{self.interaction_df["Normalised_interaction_energy"][index]}"' + '\n') + + return(self.output) + +#print(PostProcessRIBlast().output) \ No newline at end of file diff --git a/primingsitepredictor/postprocessingmodule.py b/primingsitepredictor/postprocessingmodule.py deleted file mode 100644 index 84888e5129504d2fbb9dc4d2ebf007164da16458..0000000000000000000000000000000000000000 --- a/primingsitepredictor/postprocessingmodule.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Nov 16 16:47:20 2022 - -@author: baerma -""" -from createtranscript import CreateTranscript - - -transcripts = CreateTranscript() -interaction_df = transcripts.generate_interaction_df() - - - -#os.chdir('C:/Users/baerma/Desktop/PhD-Local/Lectures/Programming for Life Sciences/priming-site-predictor/primingsitepredictor') - diff --git a/primingsitepredictor/preprocessingmodule.py b/primingsitepredictor/preprocessingmodule.py deleted file mode 100644 index 16b6da0f998d2b32183056b4959f6ce3657bcdf3..0000000000000000000000000000000000000000 --- a/primingsitepredictor/preprocessingmodule.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Nov 16 14:17:06 2022 - -@author: baerma -""" -from createprimer import CreatePrimer - -def generate_RIBlast_input(): - """This function creates a list of the filenames for the RIBlast""" - my_primer = CreatePrimer() - my_primer.create_fasta() - primer_filename = my_primer.name +".fasta" - transcripts_filename = "transcripts.fasta" - - return [primer_filename, transcripts_filename] - -print(generate_RIBlast_input()) - - -