updated the main and postprocessing according to mihaelas feedback

4ac6646b · Max Bär · 5c4e1b1d · 4ac6646b · 5c4e1b1d · 4ac6646b
Commit 4ac6646b authored 2 years ago by Max Bär
--- a/primingsitepredictor/cli.py
+++ b/primingsitepredictor/cli.py
@@ -6,6 +6,7 @@ Created on Mon Nov 14 14:49:50 2022
 """
 import argparse
 import logging
+import main
 def create_parser():
    """This function creates the parser"""
@@ -30,5 +31,5 @@ if __name__ == '__main__':
        level=logging.INFO,
    )
    LOG = logging.getLogger(__name__)
-    letsgo()
+    main()
    #here we would point to the main module and parse the energy cutoff
--- a/primingsitepredictor/createtranscript.py
+++ b/primingsitepredictor/createtranscript.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Nov 16 17:48:04 2022
-@author: baerma
-"""
-# with open("RIBlast output example.txt", 'r') as file:
-#     content = file.readlines()
-#     print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':'))
-#     #create a instant of each transcript class
-#     print(len(content[3:]))
-#     print((content[3:]))
-import pandas as pd
-import math
-class CreateTranscript():
-    def __init__(self):
-        with open("RIBlast output example.txt", 'r') as file:
-            self.raw_interactions = file.readlines()[3:]
-    def generate_interaction_df(self):
-        self.interaction_list = []
-        #clean up the original list so that we have a neet list
-        for i in range(0, (len(self.raw_interactions)-1)):
-            current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
-            self.interaction_list.append(current_interaction) 
-        #identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite)
-        previous_interaction_base = int
-        for i in range(0, len(self.interaction_list)):
-            previous_interaction_base = int(self.interaction_list[i-1][13])
-            if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1):
-                self.interaction_list[i].append('Repeat')
-            else :
-                self.interaction_list[i].append('Not_repeat')
-        #exclude all interactions which are a repeat and belong to the same bindingsite
-        self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat']
-        #add total number of interactions per transcript and calculate energy
-        self.df = pd.DataFrame(self.cleaned_interaction_list)
-        self.df['Number of interactions'] = int
-        self.df['Interaction Energy'] = float
-        energy_constant = 1.380649*10**(-23)*298
-        kcalmol_joul = 6.9477*10**-21
-        for ind in self.df.index:
-            self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]]
-            self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant)
-        print(self.df['Interaction Energy'])
-        print(self.df)
-        return self.df
-transcripts = CreateTranscript()    
-interaction_df = transcripts.generate_interaction_df()        
-#print line by line to file and then you're done
-output = str()
-for i in interaction_df.index:
-    #print(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"')
-    output = output + str(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"' + '\n')
-print(output)
-with open('output_transcripts_df.txt', 'w') as f:
-    f.write(output)
--- a/primingsitepredictor/main.py
+++ b/primingsitepredictor/main.py
 import sys
-#from .classmodule import MyClass
+from createprimer import CreatePrimer
-#from .funcmodule import my_function
+from postprocessing import PostProcessRIBlast
 def main():
-    print('in main')
+    generate_RIBlast_input()
-    args = sys.argv[1:]
+    create_gtf()
-    print('count of args :: {}'.format(len(args)))
-    for arg in args:
-        print('passed argument :: {}'.format(arg))
-    #my_function('Hello World')
+def generate_RIBlast_input():
-    #my_object = MyClass('Robin')
+    """This function creates a list of the filenames for the RIBlast"""
-    #my_object.say_name()
+    my_primer = CreatePrimer()
+    my_primer.create_fasta()
+    primer_filename = my_primer.name +".fasta"
+    transcripts_filename = "transcripts.fasta"
+    return [primer_filename, transcripts_filename]
+def create_gtf():
+    gtf_file = PostProcessRIBlast().output
+    print(gtf_file)
+main()
 if __name__ == '__main__':
    main()
--- a/primingsitepredictor/postprocessing.py
+++ b/primingsitepredictor/postprocessing.py
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Dec 20 14:06:20 2022
+@author: baerma
+"""
+import pandas as pd
+import math
+pd.options.mode.chained_assignment = None
+class PostProcessRIBlast():
+    def __init__(self):
+        output = self.generate_gtf()
+        #print(output)
+    def calculate_energy(self, value):
+        energy_constant = 1.380649*10**(-23)*298
+        kcalmol_joul = 6.9477*10**-21
+        return (math.exp(-float(value)*kcalmol_joul/energy_constant))
+    def create_list_from_output(self):
+        self.file = "RIBlast output example.txt"
+        self.firstline = 3
+        self.interaction_list = []
+        with open(self.file, 'r') as file:
+            self.raw_interactions = file.readlines()[self.firstline:]   
+        self.number_entries = len(self.raw_interactions)
+        for i in range(0, self.number_entries-1):
+            current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
+            self.interaction_list.append(current_interaction) 
+        return self.interaction_list
+    def create_pandas_df(self):
+        self.interaction_list = self.create_list_from_output()
+        self.df = pd.DataFrame(self.interaction_list)
+        self.df['Number_of_interactions'] = int(0)
+        self.df['Interaction_Energy'] = float(0)
+        self.transcript = 3
+        self.energy = 5
+        for index in self.df.index:
+            self.df['Number_of_interactions'][index]=self.df[self.transcript].value_counts()[self.df[self.transcript][index]]
+            self.df['Interaction_Energy'][index]=self.calculate_energy(self.df[self.energy][index])
+        self.df['Normalised_interaction_energy']=self.df['Interaction_Energy']/self.df['Number_of_interactions']
+        return self.df
+    def generate_gtf(self):
+        self.interaction_df = self.create_pandas_df()
+        self.output = str()
+        for index in self.interaction_df.index:
+            self.output = self.output + str(self.interaction_df[3][index]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + self.interaction_df[13][index] + '\t' + self.interaction_df[12][index] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Interaction_Energy "{self.interaction_df["Normalised_interaction_energy"][index]}"' + '\n')
+        return(self.output)
+#print(PostProcessRIBlast().output)
\ No newline at end of file
--- a/primingsitepredictor/postprocessingmodule.py
+++ b/primingsitepredictor/postprocessingmodule.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Nov 16 16:47:20 2022
-@author: baerma
-"""
-from createtranscript import CreateTranscript
-transcripts = CreateTranscript()    
-interaction_df = transcripts.generate_interaction_df()       
-#os.chdir('C:/Users/baerma/Desktop/PhD-Local/Lectures/Programming for Life Sciences/priming-site-predictor/primingsitepredictor')
--- a/primingsitepredictor/preprocessingmodule.py
+++ b/primingsitepredictor/preprocessingmodule.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Nov 16 14:17:06 2022
-@author: baerma
-"""
-from createprimer import CreatePrimer
-def generate_RIBlast_input():
-    """This function creates a list of the filenames for the RIBlast"""
-    my_primer = CreatePrimer()
-    my_primer.create_fasta()
-    primer_filename = my_primer.name +".fasta"
-    transcripts_filename = "transcripts.fasta"
-    return [primer_filename, transcripts_filename]
-print(generate_RIBlast_input())