add scripts to generate S1 Fig. and S1 Table.

7fbefd70 · Studer Gabriel · 2cea55a3 · 7fbefd70 · 7fbefd70 · 7fbefd70
Commit 7fbefd70 authored 4 years ago by Studer Gabriel
--- a/modelling/build_promod_models.py
+++ b/modelling/build_promod_models.py
@@ -19,6 +19,9 @@ for t in targets:
       os.path.exists(prof_path):
        cmd = ['pm', 'build-model', '-p', tpl_path, '-f', aln_path, 
               '-s', prof_path, '-o', out_path]
-        subprocess.run(cmd)
+        p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print(p.stdout.decode())
+        print(p.stderr.decode())

 print("full modelling time: ", time.time() - start_time)
+
--- a/other/README.md
+++ b/other/README.md
+Scripts and data to reproduce various plots / table from the Supporting 
+information
+
+1. S1 Fig: Length of modelled loops
+2. S1 Table: Structural coverage in default StructureDB
+
+
+S1 Fig: Length of modelled loops
+--------------------------------
+
+- fetch the stdout of running  build_promod_models.py in the modelling
+  benchmark by executing 
+  `pm build_promod_models.py > promod3_modelling_out.txt`
+- copy promod3_modelling_out.txt in the same directory as 
+  cameo_benchmark_loop_lengths.py
+- execute `pm cameo_benchmark_loop_length.py` which creates 
+  cameo_benchmark_loop_lengths.png and spits out some numbers
+
+S1 Table: Structural coverage in default StructureDB
+----------------------------------------------------
+
+- execute `pm structural_coverage_in_StructureDB.py` to get the values that are 
+  displayed in S1 Table. Be aware: the table reports the average values over 3 
+  runs of this script. 
+
--- a/other/cameo_benchmark_loop_lengths.png
+++ b/other/cameo_benchmark_loop_lengths.png
--- a/other/cameo_benchmark_loop_lengths.py
+++ b/other/cameo_benchmark_loop_lengths.py
+import numpy as np
+import matplotlib.pyplot as plt
+
+with open('promod3_modelling_out.txt', 'r') as fh:
+    modelling_out = fh.readlines()
+
+n_deletions_relaxed = 0
+initial_loop_lengths = list()
+resolved_loop_lengths = list()
+
+for line in modelling_out:
+    if line.startswith('Resolved'):
+        loop_string_initial = line.split()[1]
+        loop_string_resolved = line.split()[4]
+        loop_seq_initial = loop_string_initial.split('-')[1].strip('()')
+        loop_seq_resolved = loop_string_resolved.split('-')[1].strip('()')
+        initial_loop_lengths.append(len(loop_seq_initial))
+        resolved_loop_lengths.append(len(loop_seq_resolved))
+    if line.startswith('Closed') and 'relaxing' in line:
+        n_deletions_relaxed += 1
+
+initial_histogram = [0] * 26
+resolved_histogram = [0] * 26
+initial_n_above = 0
+resolved_n_above = 0
+
+for l in initial_loop_lengths:
+    if l > 25:
+        initial_n_above += 1
+    else:
+        initial_histogram[l] += 1
+
+
+for l in resolved_loop_lengths:
+    if l > 25:
+        resolved_n_above += 1
+    else:
+        resolved_histogram[l] += 1
+
+n_resolved_12 = sum(resolved_histogram[:13])
+n_resolved_total = sum(resolved_histogram) + resolved_n_above
+
+print('total:', n_resolved_total)
+print('resolved fraction <= 12:', float(n_resolved_12)/(n_resolved_total))
+print('initial above 25:', initial_n_above)
+print('resolved above 25:', resolved_n_above)
+print('relaxed deletions that didnt enter loop modelling', n_deletions_relaxed)
+
+x_initial = np.linspace(0, 25, 26) - 0.2
+x_resolved = x_initial + 0.4
+
+cred = (128.0/255,0.0,0.0)
+cblue = (102.0/255,153.0/255,204.0/255)
+
+# do the barplots representing the length histograms
+plt.bar(x_initial, initial_histogram, width=0.4, color=cred, align='center', 
+        linewidth=1.5, label='initial length', edgecolor='k')
+plt.bar(x_resolved, resolved_histogram, width=0.4, color=cblue, align='center', 
+        linewidth=1.5, label='resolved length', edgecolor='k')
+
+# do the vertical line representing the cutoff for database approach / 
+# Monte Carlo fallback
+plt.axvline(12.5, linewidth = 2.0, color='k', linestyle='dashed')
+
+plt.legend(frameon=False, fontsize='x-large')
+plt.xlim((-0.4, 25.4))
+plt.xticks([0, 5, 10, 12, 15, 20, 25],['0', '5', '10', '12', '15', '20', '25'])
+plt.tick_params(axis ='both', which ='both', length = 0)
+plt.xlabel('Loop Length', fontsize='x-large')
+plt.ylabel('N', fontsize='x-large')
+
+plt.savefig('cameo_benchmark_loop_lengths.png')
+
--- a/other/promod3_modelling_out.txt
+++ b/other/promod3_modelling_out.txt
--- a/other/structural_coverage_in_StructureDB.py
+++ b/other/structural_coverage_in_StructureDB.py
+from promod3 import loop
+import random
+
+def GetCoveredFraction(db, length, N, rmsd_thresh, coil_thresh):
+
+    # get all possible fragments
+    frag_infos = list()
+    for coord_idx in range(db.GetNumCoords()):
+        coord_info = db.GetCoordInfo(coord_idx)
+        for frag_start_idx in range(coord_info.size - length):
+            frag_info = loop.FragmentInfo(coord_idx, frag_start_idx, length)
+            frag_infos.append((frag_info, coord_info.id))
+  
+    # randomize
+    random.shuffle(frag_infos)
+
+    N_processed = 0
+    N_covered = 0 
+    dummy_seq = 'A' * length 
+
+    for frag_idx in range(len(frag_infos)):
+        if N_processed >= N:
+            break # its enough!
+
+        frag_info = frag_infos[frag_idx][0]
+        pdb_id = frag_infos[frag_idx][1]
+ 
+        # check whether coil threshold is fulfilled
+        ss = db.GetDSSPStates(frag_info)
+        n_coil = ss.count('C')
+        if float(n_coil) / length < coil_thresh:
+            continue
+
+        # search for similar fragment in other entry
+        frag = db.GetBackboneList(frag_info, dummy_seq)
+        is_covered = False
+        for coord_idx in range(db.GetNumCoords()):
+            if is_covered:
+                break
+            coord_info = db.GetCoordInfo(coord_idx)
+            if pdb_id == coord_info.id:
+                # same entry with respect to pdb id
+                continue
+            for frag_start_idx in range(coord_info.size - length):
+                current_frag_info = loop.FragmentInfo(coord_idx, frag_start_idx, 
+                                                      length)
+                current_frag = db.GetBackboneList(current_frag_info, dummy_seq)
+                rmsd = frag.CARMSD(current_frag, superposed_rmsd=True)
+                if rmsd < rmsd_thresh:
+                    is_covered = True
+                    break
+        # add up results
+        N_processed += 1
+        if is_covered:
+            N_covered += 1
+    return float(N_covered) / N_processed
+
+db = loop.LoadStructureDB()
+
+print('No Thresh:')
+for i in range(3,16):
+    print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.0))
+
+print('50 percent coils:')
+for i in range(3,16):
+    print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.5))
+
--- a/sidechain/.~lock.eval_promod_frm_no_subrotamer_optimization.csv#
+++ b/sidechain/.~lock.eval_promod_frm_no_subrotamer_optimization.csv#
-,schdaude,schdaudoputer,29.06.2020 09:45,file:///home/schdaude/.config/libreoffice/4;
\ No newline at end of file