Skip to content
Snippets Groups Projects
Commit a80d5f0b authored by Max Bär's avatar Max Bär
Browse files

Merge branch 'feature_max' into 'main'

First attempt on postprocess

See merge request !24
parents 38880ee0 4edd965f
No related branches found
No related tags found
1 merge request!24First attempt on postprocess
......@@ -12,27 +12,57 @@ Created on Wed Nov 16 17:48:04 2022
# print(len(content[3:]))
# print((content[3:]))
import pandas as pd
import math
class CreateTranscript():
def __init__(self):
with open("RIBlast output example.txt", 'r') as file:
self.list_of_interactions = file.readlines()[3:]
self.raw_interactions = file.readlines()[3:]
def generate_interaction_list(self):
interaction_list = []
for i in range(0, (len(self.list_of_interactions)-1)):
current_interaction = self.list_of_interactions[i].strip(' \n').split(',')
#print(self.list_of_interactions[i].strip(' \n').split(','))
interaction_list.append(current_interaction)
return interaction_list
transcriptlist = CreateTranscript()
print(transcriptlist.generate_interaction_list())
#go from interaction list to transcript list? -no we will serve them a interaction list.
# print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':'))
def generate_interaction_df(self):
self.interaction_list = []
#clean up the original list so that we have a neet list
for i in range(0, (len(self.raw_interactions)-1)):
current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
self.interaction_list.append(current_interaction)
#identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite)
previous_interaction_base = int
for i in range(0, len(self.interaction_list)):
previous_interaction_base = int(self.interaction_list[i-1][13])
if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1):
self.interaction_list[i].append('Repeat')
else :
self.interaction_list[i].append('Not_repeat')
#exclude all interactions which are a repeat and belong to the same bindingsite
self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat']
#add total number of interactions per transcript and calculate energy
self.df = pd.DataFrame(self.cleaned_interaction_list)
self.df['Number of interactions'] = int
self.df['Interaction Energy'] = float
energy_constant = 1.380649*10**(-23)*298
kcalmol_joul = 6.9477*10**-21
for ind in self.df.index:
self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]]
self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant)
print(self.df['Interaction Energy'])
print(self.df)
return self.df
transcripts = CreateTranscript()
interaction_df = transcripts.generate_interaction_df()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment