Commit b0a19b07 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

add clash analysis in sidechain benchmark

parent 8a46a93e
......@@ -3,7 +3,7 @@ import os
import numpy as np
import traceback
from promod3 import sidechain
from ost import io, seq, geom
from ost import io, seq, geom, mol
def _GetAmbigRMSD(r_one, r_two, fix_atoms, ambig_atom_pairs):
......@@ -97,6 +97,7 @@ class AAEval:
self.num_correct_chi2_given_chi1 = 0.0
self.rmsd_values = list()
self.num_residues = 0.0
self.num_clashing = 0.0
def GetAAEvaluations(model_dir, target_dir = 'scwrl_testset',
......@@ -106,6 +107,7 @@ def GetAAEvaluations(model_dir, target_dir = 'scwrl_testset',
'PRO','SER','THR','TRP','TYR','VAL']
eval_data = {aa : AAEval() for aa in AA}
testset_ids = set(f[:4] for f in os.listdir(target_dir))
ff = mol.mm.LoadCHARMMForcefield()
for target_idx, t in enumerate(testset_ids):
try:
target_path = os.path.join(target_dir, t + '-molcked.pdb')
......@@ -175,6 +177,45 @@ def GetAAEvaluations(model_dir, target_dir = 'scwrl_testset',
rm = GetRMSD(r_t, r_m)
if rm == rm:
eval_data[r_t.GetName()].rmsd_values.append(rm)
# clash analysis is done at the very end as we need to do some
# renaming magic...
mol.mm.MMModeller.AssignGromacsNaming(model_structure.handle)
ff.AssignFFSpecificNames(model_structure.handle)
model_structure = model_structure.Select('peptide=true and ele!=H')
relevant_model_structure = model_structure.Select(relevant_query)
# we only count clashes of residues from unique chains but do that
# relative to the whole structure
for r in relevant_model_structure.residues:
residue_clashes = False
for a in r.atoms:
if residue_clashes:
break
if a.GetName() in ['N', 'CA', 'C', 'O', 'CB']:
continue # we're only interested in sidechain atoms
a_type = ff.GetAtomType(r.name, a.name)
close_atoms = model_structure.FindWithin(a.GetPos(), 4.0)
for b in close_atoms:
if a.residue.GetQualifiedName() != \
b.residue.GetQualifiedName():
if a.name == 'SG' and b.name == 'SG':
continue # skip disulfid bonds
b_type = ff.GetAtomType(b.residue.name, b.name)
lj = ff.GetLJ(a_type, b_type, False)
sigma = lj.GetParam()[0]
sigma *= 10 # nm -> A
d = geom.Distance(a.GetPos(), b.GetPos())
if d <= 0.6*sigma:
residue_clashes = True
break
if residue_clashes:
# we first need to undo renaming mess
rname = r.GetName()
if rname == "HSE":
rname = "HIS"
eval_data[rname].num_clashing += 1
except:
print('failed to evaluate target', t)
traceback.print_exc()
......@@ -192,18 +233,21 @@ def PrintPerformance(model_dir, only_table=True, csv_path=None):
total_num_residues = 0
total_correct_num_chi1 = 0
total_correct_num_chi2 = 0
total_num_clashing = 0
for key in eval_data.keys():
total_num_chi1 += eval_data[key].num_valid_chi1
total_num_chi2 += eval_data[key].num_valid_chi2
total_num_residues += eval_data[key].num_residues
total_correct_num_chi1 += eval_data[key].num_correct_chi1
total_correct_num_chi2 += eval_data[key].num_correct_chi2
total_num_clashing += eval_data[key].num_clashing
if not only_table:
print('total num valid chi1 angles: ', total_num_chi1)
print('fraction correct:', float(total_correct_num_chi1) / total_num_chi1)
print('total num valid chi2 angles: ', total_num_chi2)
print('fraction correct:', float(total_correct_num_chi2) / total_num_chi2)
print('total num clashing', total_num_clashing)
print('total num residues: ', total_num_residues)
for key in eval_data.keys():
......@@ -220,10 +264,11 @@ def PrintPerformance(model_dir, only_table=True, csv_path=None):
print('chi2 accuracy given chi1: ',
eval_data[key].num_correct_chi2_given_chi1/eval_data[key].num_valid_chi2_given_chi1,
eval_data[key].num_valid_chi2_given_chi1)
print("num clashing: ", eval_data[key].num_clashing)
# output for csv table
csv_out = list()
csv_out.append('AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD')
csv_out.append('AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash')
AA = ['ARG','ASN','ASP','CYS','GLN','GLU','HIS','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL']
for aa in AA:
data = eval_data[aa]
......@@ -232,6 +277,7 @@ def PrintPerformance(model_dir, only_table=True, csv_path=None):
chi2_fraction = ' '
chi2_fraction_given_chi1 = ' '
rmsd = ' '
n_clash = str(int(data.num_clashing))
if data.num_valid_chi1 > 0:
chi1_fraction = '%.2f'%(100*float(data.num_correct_chi1)/data.num_valid_chi1)
if data.num_valid_chi2 > 0:
......@@ -240,7 +286,7 @@ def PrintPerformance(model_dir, only_table=True, csv_path=None):
chi2_fraction_given_chi1 = '%.2f'%(100*data.num_correct_chi2_given_chi1/data.num_valid_chi2_given_chi1)
if len(data.rmsd_values) > 0:
rmsd = '%.2f'%(np.mean(data.rmsd_values))
csv_out.append(','.join([aa, num, chi1_fraction, chi2_fraction, chi2_fraction_given_chi1, rmsd]))
csv_out.append(','.join([aa, num, chi1_fraction, chi2_fraction, chi2_fraction_given_chi1, rmsd, n_clash]))
if csv_path:
with open(csv_path, 'w') as fh:
......
......@@ -16,7 +16,7 @@ model_directories = ['models/promod/frm',
# SCWRL4 completely deletes the residue
# ProMod3 doesn't touch it and the correct sidechain remains in place
# This is unfair in favour of ProMod3, as the correct sidechain is evaluated.
# however, the effect on overall performance can considered to be neglectible...
# however, the effect on overall performance can considered to be small...
for md in model_directories:
print('processing:', md)
......
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,68.09,68.20,74.18,2.02
ASN,2875,76.17,39.76,47.49,0.75
ASP,4013,71.72,53.35,62.09,1.07
CYS,999,84.68, , ,0.22
GLN,2501,72.57,62.69,72.62,1.16
GLU,4611,65.80,62.18,69.28,1.59
HIS,1542,79.64,45.72,49.59,1.03
ILE,3964,91.60,84.06,86.97,0.34
LEU,6554,85.66,83.25,94.14,0.38
LYS,3819,68.29,73.21,76.88,1.23
MET,1406,78.73,67.07,76.60,0.81
PHE,2715,90.39,85.27,88.96,0.77
PRO,3230,79.01,78.20,98.79,0.17
SER,4101,59.33, , ,0.41
THR,3784,75.29, , ,0.38
TRP,979,83.55,71.50,81.05,1.26
TYR,2346,89.00,81.50,86.11,0.90
VAL,5018,87.23, , ,0.35
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,68.09,68.20,74.18,2.02,3
ASN,2875,76.17,39.76,47.49,0.75,0
ASP,4013,71.72,53.35,62.09,1.07,2
CYS,999,84.68, , ,0.22,1
GLN,2501,72.57,62.69,72.62,1.16,1
GLU,4611,65.80,62.18,69.28,1.59,1
HIS,1542,79.64,45.72,49.59,1.03,0
ILE,3964,91.60,84.06,86.97,0.34,0
LEU,6554,85.66,83.25,94.14,0.38,1
LYS,3819,68.29,73.21,76.88,1.23,0
MET,1406,78.73,67.07,76.60,0.81,1
PHE,2715,90.39,85.27,88.96,0.77,4
PRO,3230,79.01,78.20,98.79,0.17,4
SER,4101,59.33, , ,0.41,0
THR,3784,75.29, , ,0.38,0
TRP,979,83.55,71.50,81.05,1.26,4
TYR,2346,89.00,81.50,86.11,0.90,12
VAL,5018,87.23, , ,0.35,2
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,66.98,69.98,74.71,2.07
ASN,2875,74.57,37.18,44.82,0.78
ASP,4013,70.15,50.83,59.68,1.13
CYS,999,84.38, , ,0.22
GLN,2501,71.57,61.22,70.06,1.19
GLU,4611,63.96,62.61,68.90,1.67
HIS,1542,78.08,40.53,44.44,1.09
ILE,3964,90.94,83.25,86.10,0.35
LEU,6554,84.82,83.06,94.39,0.40
LYS,3819,67.32,73.24,77.17,1.26
MET,1406,77.52,67.00,77.52,0.84
PHE,2715,86.37,79.52,85.12,0.91
PRO,3230,78.30,77.43,98.73,0.17
SER,4101,58.55, , ,0.41
THR,3784,74.71, , ,0.38
TRP,979,80.59,66.09,75.03,1.45
TYR,2346,85.51,79.28,85.14,1.07
VAL,5018,87.35, , ,0.35
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,66.98,69.98,74.71,2.07,10
ASN,2875,74.57,37.18,44.82,0.78,2
ASP,4013,70.15,50.83,59.68,1.13,4
CYS,999,84.38, , ,0.22,2
GLN,2501,71.57,61.22,70.06,1.19,1
GLU,4611,63.96,62.61,68.90,1.67,3
HIS,1542,78.08,40.53,44.44,1.09,6
ILE,3964,90.94,83.25,86.10,0.35,7
LEU,6554,84.82,83.06,94.39,0.40,19
LYS,3819,67.32,73.24,77.17,1.26,2
MET,1406,77.52,67.00,77.52,0.84,3
PHE,2715,86.37,79.52,85.12,0.91,17
PRO,3230,78.30,77.43,98.73,0.17,6
SER,4101,58.55, , ,0.41,0
THR,3784,74.71, , ,0.38,3
TRP,979,80.59,66.09,75.03,1.45,21
TYR,2346,85.51,79.28,85.14,1.07,64
VAL,5018,87.35, , ,0.35,5
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,73.67,68.65,73.80,1.96
ASN,2875,83.34,49.04,56.43,0.63
ASP,4013,82.01,61.50,70.86,0.77
CYS,999,87.89, , ,0.18
GLN,2501,75.77,63.97,72.03,1.11
GLU,4611,70.11,62.91,70.40,1.49
HIS,1542,84.95,48.57,52.21,0.89
ILE,3964,95.86,86.28,88.05,0.26
LEU,6554,88.74,85.96,94.77,0.33
LYS,3819,74.94,74.00,77.88,1.12
MET,1406,81.58,72.05,80.38,0.74
PHE,2715,94.00,87.62,90.56,0.61
PRO,3230,80.93,80.00,98.81,0.14
SER,4101,69.06, , ,0.32
THR,3784,89.27, , ,0.21
TRP,979,89.68,76.51,83.37,0.99
TYR,2346,92.16,84.83,88.39,0.74
VAL,5018,93.08, , ,0.25
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,73.67,68.65,73.80,1.96,6
ASN,2875,83.34,49.04,56.43,0.63,1
ASP,4013,82.01,61.50,70.86,0.77,3
CYS,999,87.89, , ,0.18,2
GLN,2501,75.77,63.97,72.03,1.11,1
GLU,4611,70.11,62.91,70.40,1.49,0
HIS,1542,84.95,48.57,52.21,0.89,2
ILE,3964,95.86,86.28,88.05,0.26,3
LEU,6554,88.74,85.96,94.77,0.33,6
LYS,3819,74.94,74.00,77.88,1.12,0
MET,1406,81.58,72.05,80.38,0.74,0
PHE,2715,94.00,87.62,90.56,0.61,7
PRO,3230,80.93,80.00,98.81,0.14,4
SER,4101,69.06, , ,0.32,1
THR,3784,89.27, , ,0.21,1
TRP,979,89.68,76.51,83.37,0.99,3
TYR,2346,92.16,84.83,88.39,0.74,13
VAL,5018,93.08, , ,0.25,1
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,73.76,71.26,76.05,1.97
ASN,2875,83.17,49.15,56.71,0.63
ASP,4013,82.28,61.48,70.32,0.77
CYS,999,87.69, , ,0.18
GLN,2501,76.33,63.89,71.50,1.11
GLU,4611,70.07,65.26,72.79,1.48
HIS,1542,85.02,48.44,51.79,0.89
ILE,3964,95.71,86.15,87.98,0.26
LEU,6554,88.59,85.92,94.64,0.33
LYS,3819,75.18,74.18,77.88,1.13
MET,1406,81.93,72.12,80.03,0.75
PHE,2715,92.04,87.11,90.16,0.67
PRO,3230,80.93,80.00,98.81,0.14
SER,4101,69.03, , ,0.32
THR,3784,89.19, , ,0.21
TRP,979,87.54,75.69,82.03,1.07
TYR,2346,90.58,84.78,88.33,0.80
VAL,5018,93.20, , ,0.24
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,73.76,71.26,76.05,1.97,156
ASN,2875,83.17,49.15,56.71,0.63,9
ASP,4013,82.28,61.48,70.32,0.77,38
CYS,999,87.69, , ,0.18,2
GLN,2501,76.33,63.89,71.50,1.11,15
GLU,4611,70.07,65.26,72.79,1.48,83
HIS,1542,85.02,48.44,51.79,0.89,17
ILE,3964,95.71,86.15,87.98,0.26,13
LEU,6554,88.59,85.92,94.64,0.33,13
LYS,3819,75.18,74.18,77.88,1.13,15
MET,1406,81.93,72.12,80.03,0.75,4
PHE,2715,92.04,87.11,90.16,0.67,18
PRO,3230,80.93,80.00,98.81,0.14,3
SER,4101,69.03, , ,0.32,6
THR,3784,89.19, , ,0.21,4
TRP,979,87.54,75.69,82.03,1.07,25
TYR,2346,90.58,84.78,88.33,0.80,118
VAL,5018,93.20, , ,0.24,4
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,71.79,68.93,73.93,2.03
ASN,2875,81.81,46.09,53.66,0.66
ASP,4013,80.36,58.58,68.74,0.83
CYS,999,87.99, , ,0.18
GLN,2501,75.53,63.13,70.62,1.14
GLU,4611,68.08,63.70,71.39,1.56
HIS,1542,84.18,44.36,48.07,0.93
ILE,3964,95.23,85.19,87.05,0.28
LEU,6554,87.46,84.67,94.49,0.35
LYS,3819,73.61,73.42,77.20,1.16
MET,1406,81.01,70.70,79.10,0.78
PHE,2715,91.42,82.80,86.62,0.70
PRO,3230,80.25,79.29,98.77,0.14
SER,4101,69.06, , ,0.32
THR,3784,89.16, , ,0.21
TRP,979,87.23,71.60,78.81,1.17
TYR,2346,89.09,81.80,87.03,0.89
VAL,5018,93.12, , ,0.25
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,71.79,68.93,73.93,2.03,11
ASN,2875,81.81,46.09,53.66,0.66,0
ASP,4013,80.36,58.58,68.74,0.83,6
CYS,999,87.99, , ,0.18,3
GLN,2501,75.53,63.13,70.62,1.14,1
GLU,4611,68.08,63.70,71.39,1.56,4
HIS,1542,84.18,44.36,48.07,0.93,7
ILE,3964,95.23,85.19,87.05,0.28,11
LEU,6554,87.46,84.67,94.49,0.35,20
LYS,3819,73.61,73.42,77.20,1.16,1
MET,1406,81.01,70.70,79.10,0.78,5
PHE,2715,91.42,82.80,86.62,0.70,23
PRO,3230,80.25,79.29,98.77,0.14,3
SER,4101,69.06, , ,0.32,1
THR,3784,89.16, , ,0.21,6
TRP,979,87.23,71.60,78.81,1.17,14
TYR,2346,89.09,81.80,87.03,0.89,61
VAL,5018,93.12, , ,0.25,3
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,74.23,71.20,76.81,1.93
ASN,2875,81.60,47.62,55.24,0.67
ASP,4012,81.56,60.82,70.17,0.82
CYS,999,87.99, , ,0.20
GLN,2501,75.37,63.97,72.41,1.13
GLU,4611,69.29,65.73,73.49,1.49
HIS,1542,83.72,45.59,49.26,0.95
ILE,3964,95.31,84.76,86.69,0.27
LEU,6554,87.60,85.26,94.79,0.36
LYS,3819,74.31,74.10,77.80,1.14
MET,1406,80.94,72.33,79.88,0.78
PHE,2715,92.08,85.56,87.96,0.72
PRO,3230,80.80,79.41,98.20,0.14
SER,4101,69.11, , ,0.33
THR,3784,89.48, , ,0.21
TRP,979,87.44,76.00,82.48,1.10
TYR,2346,90.49,83.08,86.62,0.86
VAL,5018,92.63, , ,0.26
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,74.23,71.20,76.81,1.93,159
ASN,2875,81.60,47.62,55.24,0.67,7
ASP,4012,81.56,60.82,70.17,0.82,34
CYS,999,87.99, , ,0.20,1
GLN,2501,75.37,63.97,72.41,1.13,18
GLU,4611,69.29,65.73,73.49,1.49,90
HIS,1542,83.72,45.59,49.26,0.95,25
ILE,3964,95.31,84.76,86.69,0.27,9
LEU,6554,87.60,85.26,94.79,0.36,21
LYS,3819,74.31,74.10,77.80,1.14,18
MET,1406,80.94,72.33,79.88,0.78,1
PHE,2715,92.08,85.56,87.96,0.72,22
PRO,3230,80.80,79.41,98.20,0.14,6
SER,4101,69.11, , ,0.33,5
THR,3784,89.48, , ,0.21,11
TRP,979,87.44,76.00,82.48,1.10,21
TYR,2346,90.49,83.08,86.62,0.86,104
VAL,5018,92.63, , ,0.26,4
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD
ARG,3601,72.06,68.29,73.87,2.02
ASN,2875,80.87,46.75,54.75,0.68
ASP,4012,79.49,57.63,67.89,0.88
CYS,999,87.59, , ,0.20
GLN,2501,74.09,63.17,71.40,1.16
GLU,4611,67.53,64.32,72.03,1.55
HIS,1542,83.46,42.48,46.15,0.97
ILE,3964,94.75,83.70,85.92,0.28
LEU,6554,86.41,83.98,94.40,0.38
LYS,3819,72.72,73.13,76.99,1.18
MET,1406,79.23,70.77,78.82,0.83
PHE,2715,90.87,82.10,85.37,0.77
PRO,3230,79.72,78.27,98.10,0.15
SER,4101,67.93, , ,0.34
THR,3784,88.82, , ,0.22
TRP,979,85.90,68.74,76.10,1.23
TYR,2346,88.49,80.18,85.02,0.97
VAL,5018,92.07, , ,0.27
\ No newline at end of file
AA,num,X1 correct (%),X2 correct (%),X2 correct given X1 (%),avg RMSD, n_clash
ARG,3601,72.06,68.29,73.87,2.02,10
ASN,2875,80.87,46.75,54.75,0.68,3
ASP,4012,79.49,57.63,67.89,0.88,7
CYS,999,87.59, , ,0.20,4
GLN,2501,74.09,63.17,71.40,1.16,2
GLU,4611,67.53,64.32,72.03,1.55,4
HIS,1542,83.46,42.48,46.15,0.97,11
ILE,3964,94.75,83.70,85.92,0.28,11
LEU,6554,86.41,83.98,94.40,0.38,25
LYS,3819,72.72,73.13,76.99,1.18,2
MET,1406,79.23,70.77,78.82,0.83,6
PHE,2715,90.87,82.10,85.37,0.77,28
PRO,3230,79.72,78.27,98.10,0.15,8
SER,4101,67.93, , ,0.34,0
THR,3784,88.82, , ,0.22,5
TRP,979,85.90,68.74,76.10,1.23,19
TYR,2346,88.49,80.18,85.02,0.97,61
VAL,5018,92.07, , ,0.27,4
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment