Skip to content
Snippets Groups Projects
Commit 7fbefd70 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

add scripts to generate S1 Fig. and S1 Table.

parent 2cea55a3
No related branches found
No related tags found
No related merge requests found
......@@ -19,6 +19,9 @@ for t in targets:
os.path.exists(prof_path):
cmd = ['pm', 'build-model', '-p', tpl_path, '-f', aln_path,
'-s', prof_path, '-o', out_path]
subprocess.run(cmd)
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(p.stdout.decode())
print(p.stderr.decode())
print("full modelling time: ", time.time() - start_time)
Scripts and data to reproduce various plots / table from the Supporting
information
1. S1 Fig: Length of modelled loops
2. S1 Table: Structural coverage in default StructureDB
S1 Fig: Length of modelled loops
--------------------------------
- fetch the stdout of running build_promod_models.py in the modelling
benchmark by executing
`pm build_promod_models.py > promod3_modelling_out.txt`
- copy promod3_modelling_out.txt in the same directory as
cameo_benchmark_loop_lengths.py
- execute `pm cameo_benchmark_loop_length.py` which creates
cameo_benchmark_loop_lengths.png and spits out some numbers
S1 Table: Structural coverage in default StructureDB
----------------------------------------------------
- execute `pm structural_coverage_in_StructureDB.py` to get the values that are
displayed in S1 Table. Be aware: the table reports the average values over 3
runs of this script.
other/cameo_benchmark_loop_lengths.png

18.8 KiB

import numpy as np
import matplotlib.pyplot as plt
with open('promod3_modelling_out.txt', 'r') as fh:
modelling_out = fh.readlines()
n_deletions_relaxed = 0
initial_loop_lengths = list()
resolved_loop_lengths = list()
for line in modelling_out:
if line.startswith('Resolved'):
loop_string_initial = line.split()[1]
loop_string_resolved = line.split()[4]
loop_seq_initial = loop_string_initial.split('-')[1].strip('()')
loop_seq_resolved = loop_string_resolved.split('-')[1].strip('()')
initial_loop_lengths.append(len(loop_seq_initial))
resolved_loop_lengths.append(len(loop_seq_resolved))
if line.startswith('Closed') and 'relaxing' in line:
n_deletions_relaxed += 1
initial_histogram = [0] * 26
resolved_histogram = [0] * 26
initial_n_above = 0
resolved_n_above = 0
for l in initial_loop_lengths:
if l > 25:
initial_n_above += 1
else:
initial_histogram[l] += 1
for l in resolved_loop_lengths:
if l > 25:
resolved_n_above += 1
else:
resolved_histogram[l] += 1
n_resolved_12 = sum(resolved_histogram[:13])
n_resolved_total = sum(resolved_histogram) + resolved_n_above
print('total:', n_resolved_total)
print('resolved fraction <= 12:', float(n_resolved_12)/(n_resolved_total))
print('initial above 25:', initial_n_above)
print('resolved above 25:', resolved_n_above)
print('relaxed deletions that didnt enter loop modelling', n_deletions_relaxed)
x_initial = np.linspace(0, 25, 26) - 0.2
x_resolved = x_initial + 0.4
cred = (128.0/255,0.0,0.0)
cblue = (102.0/255,153.0/255,204.0/255)
# do the barplots representing the length histograms
plt.bar(x_initial, initial_histogram, width=0.4, color=cred, align='center',
linewidth=1.5, label='initial length', edgecolor='k')
plt.bar(x_resolved, resolved_histogram, width=0.4, color=cblue, align='center',
linewidth=1.5, label='resolved length', edgecolor='k')
# do the vertical line representing the cutoff for database approach /
# Monte Carlo fallback
plt.axvline(12.5, linewidth = 2.0, color='k', linestyle='dashed')
plt.legend(frameon=False, fontsize='x-large')
plt.xlim((-0.4, 25.4))
plt.xticks([0, 5, 10, 12, 15, 20, 25],['0', '5', '10', '12', '15', '20', '25'])
plt.tick_params(axis ='both', which ='both', length = 0)
plt.xlabel('Loop Length', fontsize='x-large')
plt.ylabel('N', fontsize='x-large')
plt.savefig('cameo_benchmark_loop_lengths.png')
This diff is collapsed.
from promod3 import loop
import random
def GetCoveredFraction(db, length, N, rmsd_thresh, coil_thresh):
# get all possible fragments
frag_infos = list()
for coord_idx in range(db.GetNumCoords()):
coord_info = db.GetCoordInfo(coord_idx)
for frag_start_idx in range(coord_info.size - length):
frag_info = loop.FragmentInfo(coord_idx, frag_start_idx, length)
frag_infos.append((frag_info, coord_info.id))
# randomize
random.shuffle(frag_infos)
N_processed = 0
N_covered = 0
dummy_seq = 'A' * length
for frag_idx in range(len(frag_infos)):
if N_processed >= N:
break # its enough!
frag_info = frag_infos[frag_idx][0]
pdb_id = frag_infos[frag_idx][1]
# check whether coil threshold is fulfilled
ss = db.GetDSSPStates(frag_info)
n_coil = ss.count('C')
if float(n_coil) / length < coil_thresh:
continue
# search for similar fragment in other entry
frag = db.GetBackboneList(frag_info, dummy_seq)
is_covered = False
for coord_idx in range(db.GetNumCoords()):
if is_covered:
break
coord_info = db.GetCoordInfo(coord_idx)
if pdb_id == coord_info.id:
# same entry with respect to pdb id
continue
for frag_start_idx in range(coord_info.size - length):
current_frag_info = loop.FragmentInfo(coord_idx, frag_start_idx,
length)
current_frag = db.GetBackboneList(current_frag_info, dummy_seq)
rmsd = frag.CARMSD(current_frag, superposed_rmsd=True)
if rmsd < rmsd_thresh:
is_covered = True
break
# add up results
N_processed += 1
if is_covered:
N_covered += 1
return float(N_covered) / N_processed
db = loop.LoadStructureDB()
print('No Thresh:')
for i in range(3,16):
print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.0))
print('50 percent coils:')
for i in range(3,16):
print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.5))
,schdaude,schdaudoputer,29.06.2020 09:45,file:///home/schdaude/.config/libreoffice/4;
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment