From 4edd965fa21550381ef710b9bd45e6ecd35b2d94 Mon Sep 17 00:00:00 2001 From: baer0006 <max.baer@unibas.ch> Date: Wed, 16 Nov 2022 23:39:50 +0100 Subject: [PATCH] First attempt on postprocess --- primingsitepredictor/createtranscript.py | 64 +++++++++++++++++------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/primingsitepredictor/createtranscript.py b/primingsitepredictor/createtranscript.py index 9b114cc..4466d92 100644 --- a/primingsitepredictor/createtranscript.py +++ b/primingsitepredictor/createtranscript.py @@ -12,27 +12,57 @@ Created on Wed Nov 16 17:48:04 2022 # print(len(content[3:])) # print((content[3:])) +import pandas as pd +import math + class CreateTranscript(): def __init__(self): with open("RIBlast output example.txt", 'r') as file: - self.list_of_interactions = file.readlines()[3:] + self.raw_interactions = file.readlines()[3:] - def generate_interaction_list(self): - interaction_list = [] - for i in range(0, (len(self.list_of_interactions)-1)): - current_interaction = self.list_of_interactions[i].strip(' \n').split(',') - #print(self.list_of_interactions[i].strip(' \n').split(',')) - interaction_list.append(current_interaction) - return interaction_list - - - -transcriptlist = CreateTranscript() -print(transcriptlist.generate_interaction_list()) - -#go from interaction list to transcript list? -no we will serve them a interaction list. - - # print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':')) + def generate_interaction_df(self): + + self.interaction_list = [] + + #clean up the original list so that we have a neet list + for i in range(0, (len(self.raw_interactions)-1)): + current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',') + self.interaction_list.append(current_interaction) + + #identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite) + previous_interaction_base = int + for i in range(0, len(self.interaction_list)): + previous_interaction_base = int(self.interaction_list[i-1][13]) + if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1): + self.interaction_list[i].append('Repeat') + else : + self.interaction_list[i].append('Not_repeat') + + #exclude all interactions which are a repeat and belong to the same bindingsite + self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat'] + + + #add total number of interactions per transcript and calculate energy + self.df = pd.DataFrame(self.cleaned_interaction_list) + self.df['Number of interactions'] = int + self.df['Interaction Energy'] = float + + energy_constant = 1.380649*10**(-23)*298 + kcalmol_joul = 6.9477*10**-21 + + + for ind in self.df.index: + self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]] + self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant) + print(self.df['Interaction Energy']) + print(self.df) + + + return self.df + +transcripts = CreateTranscript() +interaction_df = transcripts.generate_interaction_df() + -- GitLab