Skip to content
Snippets Groups Projects
Commit 4edd965f authored by Max Bär's avatar Max Bär
Browse files

First attempt on postprocess

parent 97e28760
No related branches found
No related tags found
1 merge request!24First attempt on postprocess
......@@ -12,27 +12,57 @@ Created on Wed Nov 16 17:48:04 2022
# print(len(content[3:]))
# print((content[3:]))
import pandas as pd
import math
class CreateTranscript():
def __init__(self):
with open("RIBlast output example.txt", 'r') as file:
self.list_of_interactions = file.readlines()[3:]
self.raw_interactions = file.readlines()[3:]
def generate_interaction_list(self):
interaction_list = []
for i in range(0, (len(self.list_of_interactions)-1)):
current_interaction = self.list_of_interactions[i].strip(' \n').split(',')
#print(self.list_of_interactions[i].strip(' \n').split(','))
interaction_list.append(current_interaction)
return interaction_list
transcriptlist = CreateTranscript()
print(transcriptlist.generate_interaction_list())
#go from interaction list to transcript list? -no we will serve them a interaction list.
# print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':'))
def generate_interaction_df(self):
self.interaction_list = []
#clean up the original list so that we have a neet list
for i in range(0, (len(self.raw_interactions)-1)):
current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
self.interaction_list.append(current_interaction)
#identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite)
previous_interaction_base = int
for i in range(0, len(self.interaction_list)):
previous_interaction_base = int(self.interaction_list[i-1][13])
if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1):
self.interaction_list[i].append('Repeat')
else :
self.interaction_list[i].append('Not_repeat')
#exclude all interactions which are a repeat and belong to the same bindingsite
self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat']
#add total number of interactions per transcript and calculate energy
self.df = pd.DataFrame(self.cleaned_interaction_list)
self.df['Number of interactions'] = int
self.df['Interaction Energy'] = float
energy_constant = 1.380649*10**(-23)*298
kcalmol_joul = 6.9477*10**-21
for ind in self.df.index:
self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]]
self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant)
print(self.df['Interaction Energy'])
print(self.df)
return self.df
transcripts = CreateTranscript()
interaction_df = transcripts.generate_interaction_df()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment