Skip to content
Snippets Groups Projects
Commit 9d8e9283 authored by Max Bär's avatar Max Bär
Browse files

Feature max

parent 172141ed
No related branches found
No related tags found
1 merge request!40Feature max
<primer1 <primer1
TTTTTTTTTTTTTTTT TTTTTTTTTTTTTTT
...@@ -6,6 +6,7 @@ Created on Mon Nov 14 14:49:50 2022 ...@@ -6,6 +6,7 @@ Created on Mon Nov 14 14:49:50 2022
""" """
import argparse import argparse
import logging import logging
import main
def create_parser(): def create_parser():
"""This function creates the parser""" """This function creates the parser"""
...@@ -30,5 +31,5 @@ if __name__ == '__main__': ...@@ -30,5 +31,5 @@ if __name__ == '__main__':
level=logging.INFO, level=logging.INFO,
) )
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
letsgo() main()
#here we would point to the main module and parse the energy cutoff #here we would point to the main module and parse the energy cutoff
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 16 17:48:04 2022
@author: baerma
"""
# with open("RIBlast output example.txt", 'r') as file:
# content = file.readlines()
# print(content[3:][0].strip(' \n').split(',')[-1].strip('()').split(':'))
# #create a instant of each transcript class
# print(len(content[3:]))
# print((content[3:]))
import pandas as pd
import math
class CreateTranscript():
def __init__(self):
with open("RIBlast output example.txt", 'r') as file:
self.raw_interactions = file.readlines()[3:]
def generate_interaction_df(self):
self.interaction_list = []
#clean up the original list so that we have a neet list
for i in range(0, (len(self.raw_interactions)-1)):
current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
self.interaction_list.append(current_interaction)
#identify if the interaction is the same as the previous one, just shifted by 1 bp (if we have 20 A in the transcript the 15 T primer has 5 matching possibilities although it is only 1 bindingsite)
previous_interaction_base = int
for i in range(0, len(self.interaction_list)):
previous_interaction_base = int(self.interaction_list[i-1][13])
if int(self.interaction_list[i][13]) in range(previous_interaction_base-1,previous_interaction_base-15,-1):
self.interaction_list[i].append('Repeat')
else :
self.interaction_list[i].append('Not_repeat')
#exclude all interactions which are a repeat and belong to the same bindingsite
self.cleaned_interaction_list = [item for item in self.interaction_list if item[-1]=='Not_repeat']
#add total number of interactions per transcript and calculate energy
self.df = pd.DataFrame(self.cleaned_interaction_list)
self.df['Number of interactions'] = int
self.df['Interaction Energy'] = float
energy_constant = 1.380649*10**(-23)*298
kcalmol_joul = 6.9477*10**-21
for ind in self.df.index:
self.df['Number of interactions'][ind]=self.df[3].value_counts()[self.df[3][ind]]
self.df['Interaction Energy'][ind]=math.exp(-float(self.df[5][ind])*kcalmol_joul/energy_constant)
print(self.df['Interaction Energy'])
print(self.df)
return self.df
transcripts = CreateTranscript()
interaction_df = transcripts.generate_interaction_df()
#print line by line to file and then you're done
output = str()
for i in interaction_df.index:
#print(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"')
output = output + str(interaction_df[3][i]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + interaction_df[13][i] + '\t' + interaction_df[12][i] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Accessibility_Energy "{interaction_df["Interaction Energy"][i]}"' + '\n')
print(output)
with open('output_transcripts_df.txt', 'w') as f:
f.write(output)
import sys import sys
#from .classmodule import MyClass from createprimer import CreatePrimer
#from .funcmodule import my_function from postprocessing import PostProcessRIBlast
def main(): def main():
print('in main') generate_RIBlast_input()
args = sys.argv[1:] create_gtf()
print('count of args :: {}'.format(len(args)))
for arg in args:
print('passed argument :: {}'.format(arg))
#my_function('Hello World') def generate_RIBlast_input():
#my_object = MyClass('Robin') """This function creates a list of the filenames for the RIBlast"""
#my_object.say_name() my_primer = CreatePrimer()
my_primer.create_fasta()
primer_filename = my_primer.name +".fasta"
transcripts_filename = "transcripts.fasta"
return [primer_filename, transcripts_filename]
def create_gtf():
gtf_file = PostProcessRIBlast().output
print(gtf_file)
main()
if __name__ == '__main__': if __name__ == '__main__':
main() main()
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 20 14:06:20 2022
@author: baerma
"""
import pandas as pd
import math
pd.options.mode.chained_assignment = None
class PostProcessRIBlast():
def __init__(self):
output = self.generate_gtf()
#print(output)
def calculate_energy(self, value):
energy_constant = 1.380649*10**(-23)*298
kcalmol_joul = 6.9477*10**-21
return (math.exp(-float(value)*kcalmol_joul/energy_constant))
def create_list_from_output(self):
self.file = "RIBlast output example.txt"
self.firstline = 3
self.interaction_list = []
with open(self.file, 'r') as file:
self.raw_interactions = file.readlines()[self.firstline:]
self.number_entries = len(self.raw_interactions)
for i in range(0, self.number_entries-1):
current_interaction = self.raw_interactions[i].strip(' \n').replace('(', '').replace(')','').replace('-',',').replace(':',',').split(',')
self.interaction_list.append(current_interaction)
return self.interaction_list
def create_pandas_df(self):
self.interaction_list = self.create_list_from_output()
self.df = pd.DataFrame(self.interaction_list)
self.df['Number_of_interactions'] = int(0)
self.df['Interaction_Energy'] = float(0)
self.transcript = 3
self.energy = 5
for index in self.df.index:
self.df['Number_of_interactions'][index]=self.df[self.transcript].value_counts()[self.df[self.transcript][index]]
self.df['Interaction_Energy'][index]=self.calculate_energy(self.df[self.energy][index])
self.df['Normalised_interaction_energy']=self.df['Interaction_Energy']/self.df['Number_of_interactions']
return self.df
def generate_gtf(self):
self.interaction_df = self.create_pandas_df()
self.output = str()
for index in self.interaction_df.index:
self.output = self.output + str(self.interaction_df[3][index]+'\t' + 'RIBlast' + '\t' + 'Priming_site' + '\t' + self.interaction_df[13][index] + '\t' + self.interaction_df[12][index] + '\t' + '.' + '\t' + '+' + '\t' + '.' + '\t' + f'Interaction_Energy "{self.interaction_df["Normalised_interaction_energy"][index]}"' + '\n')
return(self.output)
#print(PostProcessRIBlast().output)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 16 16:47:20 2022
@author: baerma
"""
from createtranscript import CreateTranscript
transcripts = CreateTranscript()
interaction_df = transcripts.generate_interaction_df()
#os.chdir('C:/Users/baerma/Desktop/PhD-Local/Lectures/Programming for Life Sciences/priming-site-predictor/primingsitepredictor')
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 16 14:17:06 2022
@author: baerma
"""
from createprimer import CreatePrimer
def generate_RIBlast_input():
"""This function creates a list of the filenames for the RIBlast"""
my_primer = CreatePrimer()
my_primer.create_fasta()
primer_filename = my_primer.name +".fasta"
transcripts_filename = "transcripts.fasta"
return [primer_filename, transcripts_filename]
print(generate_RIBlast_input())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment