Commit 7fbefd70 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

add scripts to generate S1 Fig. and S1 Table.

parent 2cea55a3
......@@ -19,6 +19,9 @@ for t in targets:
os.path.exists(prof_path):
cmd = ['pm', 'build-model', '-p', tpl_path, '-f', aln_path,
'-s', prof_path, '-o', out_path]
subprocess.run(cmd)
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(p.stdout.decode())
print(p.stderr.decode())
print("full modelling time: ", time.time() - start_time)
Scripts and data to reproduce various plots / table from the Supporting
information
1. S1 Fig: Length of modelled loops
2. S1 Table: Structural coverage in default StructureDB
S1 Fig: Length of modelled loops
--------------------------------
- fetch the stdout of running build_promod_models.py in the modelling
benchmark by executing
`pm build_promod_models.py > promod3_modelling_out.txt`
- copy promod3_modelling_out.txt in the same directory as
cameo_benchmark_loop_lengths.py
- execute `pm cameo_benchmark_loop_length.py` which creates
cameo_benchmark_loop_lengths.png and spits out some numbers
S1 Table: Structural coverage in default StructureDB
----------------------------------------------------
- execute `pm structural_coverage_in_StructureDB.py` to get the values that are
displayed in S1 Table. Be aware: the table reports the average values over 3
runs of this script.
import numpy as np
import matplotlib.pyplot as plt
with open('promod3_modelling_out.txt', 'r') as fh:
modelling_out = fh.readlines()
n_deletions_relaxed = 0
initial_loop_lengths = list()
resolved_loop_lengths = list()
for line in modelling_out:
if line.startswith('Resolved'):
loop_string_initial = line.split()[1]
loop_string_resolved = line.split()[4]
loop_seq_initial = loop_string_initial.split('-')[1].strip('()')
loop_seq_resolved = loop_string_resolved.split('-')[1].strip('()')
initial_loop_lengths.append(len(loop_seq_initial))
resolved_loop_lengths.append(len(loop_seq_resolved))
if line.startswith('Closed') and 'relaxing' in line:
n_deletions_relaxed += 1
initial_histogram = [0] * 26
resolved_histogram = [0] * 26
initial_n_above = 0
resolved_n_above = 0
for l in initial_loop_lengths:
if l > 25:
initial_n_above += 1
else:
initial_histogram[l] += 1
for l in resolved_loop_lengths:
if l > 25:
resolved_n_above += 1
else:
resolved_histogram[l] += 1
n_resolved_12 = sum(resolved_histogram[:13])
n_resolved_total = sum(resolved_histogram) + resolved_n_above
print('total:', n_resolved_total)
print('resolved fraction <= 12:', float(n_resolved_12)/(n_resolved_total))
print('initial above 25:', initial_n_above)
print('resolved above 25:', resolved_n_above)
print('relaxed deletions that didnt enter loop modelling', n_deletions_relaxed)
x_initial = np.linspace(0, 25, 26) - 0.2
x_resolved = x_initial + 0.4
cred = (128.0/255,0.0,0.0)
cblue = (102.0/255,153.0/255,204.0/255)
# do the barplots representing the length histograms
plt.bar(x_initial, initial_histogram, width=0.4, color=cred, align='center',
linewidth=1.5, label='initial length', edgecolor='k')
plt.bar(x_resolved, resolved_histogram, width=0.4, color=cblue, align='center',
linewidth=1.5, label='resolved length', edgecolor='k')
# do the vertical line representing the cutoff for database approach /
# Monte Carlo fallback
plt.axvline(12.5, linewidth = 2.0, color='k', linestyle='dashed')
plt.legend(frameon=False, fontsize='x-large')
plt.xlim((-0.4, 25.4))
plt.xticks([0, 5, 10, 12, 15, 20, 25],['0', '5', '10', '12', '15', '20', '25'])
plt.tick_params(axis ='both', which ='both', length = 0)
plt.xlabel('Loop Length', fontsize='x-large')
plt.ylabel('N', fontsize='x-large')
plt.savefig('cameo_benchmark_loop_lengths.png')
This diff is collapsed.
from promod3 import loop
import random
def GetCoveredFraction(db, length, N, rmsd_thresh, coil_thresh):
# get all possible fragments
frag_infos = list()
for coord_idx in range(db.GetNumCoords()):
coord_info = db.GetCoordInfo(coord_idx)
for frag_start_idx in range(coord_info.size - length):
frag_info = loop.FragmentInfo(coord_idx, frag_start_idx, length)
frag_infos.append((frag_info, coord_info.id))
# randomize
random.shuffle(frag_infos)
N_processed = 0
N_covered = 0
dummy_seq = 'A' * length
for frag_idx in range(len(frag_infos)):
if N_processed >= N:
break # its enough!
frag_info = frag_infos[frag_idx][0]
pdb_id = frag_infos[frag_idx][1]
# check whether coil threshold is fulfilled
ss = db.GetDSSPStates(frag_info)
n_coil = ss.count('C')
if float(n_coil) / length < coil_thresh:
continue
# search for similar fragment in other entry
frag = db.GetBackboneList(frag_info, dummy_seq)
is_covered = False
for coord_idx in range(db.GetNumCoords()):
if is_covered:
break
coord_info = db.GetCoordInfo(coord_idx)
if pdb_id == coord_info.id:
# same entry with respect to pdb id
continue
for frag_start_idx in range(coord_info.size - length):
current_frag_info = loop.FragmentInfo(coord_idx, frag_start_idx,
length)
current_frag = db.GetBackboneList(current_frag_info, dummy_seq)
rmsd = frag.CARMSD(current_frag, superposed_rmsd=True)
if rmsd < rmsd_thresh:
is_covered = True
break
# add up results
N_processed += 1
if is_covered:
N_covered += 1
return float(N_covered) / N_processed
db = loop.LoadStructureDB()
print('No Thresh:')
for i in range(3,16):
print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.0))
print('50 percent coils:')
for i in range(3,16):
print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.5))
,schdaude,schdaudoputer,29.06.2020 09:45,file:///home/schdaude/.config/libreoffice/4;
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment