diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index 2e685cd685bc894dca4d4858885acc1d9b1a4d42..97105e399c8a1a5dc0da1a77aa132c2ecf4d97cb 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -380,6 +380,17 @@ def _MolckEntity(entity, options): Molck(entity, lib, ms) +def _AveragelDDT(scorers): + scores = [s.global_score for s in scorers] + weights = [s.total_contacts for s in scorers] + nominator = sum([s * w for s, w in zip(scores, weights)]) + denominator = sum(weights) + if denominator > 0: + return nominator / float(denominator) + else: + return 0.0 + + def _Main(): """Do the magic.""" @@ -418,21 +429,20 @@ def _Main(): model_name, reference_name)) qs_scorer = qsscoring.QSscorer(reference, model) + if opts.chain_mapping is not None: + ost.LogInfo( + "Using custom chain mapping: %s" % str( + opts.chain_mapping)) + qs_scorer.chain_mapping = opts.chain_mapping if opts.qs_score: ost.LogInfo("Computing QS-score") try: - if opts.chain_mapping is not None: - ost.LogInfo( - "Using custom chain mapping: %s" % str( - opts.chain_mapping)) - qs_scorer.chain_mapping = opts.chain_mapping reference_results["qs_score"] = { "status": "SUCCESS", "error": "", "model_name": model_name, "reference_name": reference_name, "global_score": qs_scorer.global_score, - "oligo_lddt_score": qs_scorer.lddt_score, "best_score": qs_scorer.best_score, "chain_mapping": qs_scorer.chain_mapping } @@ -445,7 +455,6 @@ def _Main(): "model_name": model_name, "reference_name": reference.GetName(), "global_score": 0.0, - "oligo_lddt_score": 0.0, "best_score": 0.0, "chain_mapping": None } @@ -468,8 +477,9 @@ def _Main(): opts.parameter_file) if opts.verbosity > 3: lddt_settings.PrintParameters() - # Perform scoring + # Perform single chain scoring # Get chains from mapped alignments + lddt_scorers = list() for aln in qs_scorer.alignments: # Get chains and renumber according to alignment (for lDDT) ch_ref = aln.GetSequence(0).GetName() @@ -483,10 +493,54 @@ def _Main(): model=model, settings=lddt_settings, stereochemical_params=stereochemical_parameters) - lddt_results["single_chain_lddt"].append({ - "model_chain": ch_mdl, - "reference_chain": ch_ref, - "global_score": lddt_scorer.global_score}) + try: + lddt_results["single_chain_lddt"].append({ + "status": "SUCCESS", + "error": "", + "model_chain": ch_mdl, + "reference_chain": ch_ref, + "global_score": lddt_scorer.global_score, + "conserved_contacts": lddt_scorer.conserved_contacts, + "total_contacts": lddt_scorer.total_contacts}) + except Exception as ex: + ost.LogError('Single chain lDDT failed:', str(ex)) + lddt_results["single_chain_lddt"].append({ + "status": "FAILURE", + "error": str(ex), + "model_chain": ch_mdl, + "reference_chain": ch_ref, + "global_score": 0.0, + "conserved_contacts": 0.0, + "total_contacts": 0.0}) + lddt_scorers.append(lddt_scorer) + # perform oligo lddt scoring + try: + oligo_lddt = qsscoring.ComputeOligoLDDT( + qs_scorer.qs_ent_1.ent, + qs_scorer.qs_ent_2.ent, + qs_scorer.alignments, + qs_scorer.calpha_only) + lddt_results["oligo_lddt"] = { + "status": "SUCCESS", + "error": "", + "global_score": oligo_lddt} + except Exception as ex: + ost.LogError('Oligo lDDT failed:', str(ex)) + lddt_results["oligo_lddt"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0} + try: + weighted_lddt = _AveragelDDT(lddt_scorers) + lddt_results["weighted_lddt"] = { + "status": "SUCCESS", + "error": "", + "global_score": weighted_lddt} + except Exception as ex: + lddt_results["weighted_lddt"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0} reference_results["lddt"] = lddt_results model_results[reference_name] = reference_results result["result"][model_name] = model_results diff --git a/modules/mol/alg/pymod/qsscoring.py b/modules/mol/alg/pymod/qsscoring.py index 16635d7025e330345dfc20109cf19fd63ffc4257..2b914893ca8c624d0d8eae7176dfe9576e9bd18a 100644 --- a/modules/mol/alg/pymod/qsscoring.py +++ b/modules/mol/alg/pymod/qsscoring.py @@ -161,9 +161,6 @@ class QSscorer: self._global_score = None self._best_score = None self._superposition = None - self._lddt_score = None - self._lddt_mdl = None - self._lddt_ref = None self._clustalw_bin = None @property @@ -434,65 +431,6 @@ class QSscorer: % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd)) return self._superposition - @property - def lddt_score(self): - """The multi-chain lDDT score. - - .. note:: - - lDDT is not considering over-prediction (i.e. extra chains) and hence is - not symmetric. Here, we consider :attr:`qs_ent_1` as the reference and - :attr:`qs_ent_2` as the model. The alignments from :attr:`alignments` are - used to map residue numbers and chains. - - The score is computed with OST's :func:`~ost.mol.alg.LocalDistDiffTest` - function with a single distance threshold of 2 A and an inclusion radius of - 8 A. You can use :attr:`lddt_mdl` and :attr:`lddt_ref` to get entities on - which you can call any other lDDT function with any other set of parameters. - - :getter: Computed on first use (cached) - :type: :class:`float` - """ - if self._lddt_score is None: - self._ComputeLDDT() - return self._lddt_score - - @property - def lddt_mdl(self): - """The model entity used for lDDT scoring (:attr:`lddt_score`) and annotated - with local scores. - - Local scores are available as residue properties named 'lddt' and on each - atom as a B-factor. Only CA atoms are considered if :attr:`calpha_only` is - True, otherwise this is an all-atom score. - - Since, the lDDT computation requires a single chain with mapped residue - numbering, all chains are appended into a single chain X with unique residue - numbers according to the column-index in the alignment. The alignments are - in the same order as they appear in :attr:`alignments`. Additional residues - are appended at the end of the chain with unique residue numbers. - - :getter: Computed on first use (cached) - :type: :class:`~ost.mol.EntityHandle` - """ - if self._lddt_mdl is None: - self._ComputeLDDT() - return self._lddt_mdl - - @property - def lddt_ref(self): - """The reference entity used for lDDT scoring (:attr:`lddt_score`). - - This is a single chain X with residue numbers matching ones in - :attr:`lddt_mdl` where aligned and unique numbers for additional residues. - - :getter: Computed on first use (cached) - :type: :class:`~ost.mol.EntityHandle` - """ - if self._lddt_ref is None: - self._ComputeLDDT() - return self._lddt_ref - @property def clustalw_bin(self): """ @@ -537,6 +475,8 @@ class QSscorer: def _ComputeScores(self): """Fills cached global_score and best_score.""" + if self.qs_ent_1.is_monomer or self.qs_ent_2.is_monomer: + raise QSscoreError("QS-score is not defined for monomers") # get contacts if self.calpha_only: contacts_1 = self.qs_ent_1.contacts_ca @@ -554,22 +494,6 @@ class QSscorer: % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(), self._best_score, self._global_score)) - def _ComputeLDDT(self): - """Fills cached lddt_score, lddt_mdl and lddt_ref.""" - LogInfo('Computing lDDT score') - # check reference and model - ref, mdl = self.qs_ent_1.ent, self.qs_ent_2.ent - LogInfo('Reference %s has: %s chains' % (ref.GetName(), ref.chain_count)) - LogInfo('Model %s has: %s chains' % (mdl.GetName(), mdl.chain_count)) - if mdl.chain_count > ref.chain_count: - LogWarning('MODEL contains more chains than REFERENCE, ' - 'lDDT is not considering them') - # get single chain reference and model - self._lddt_ref, self._lddt_mdl = \ - _MergeAlignedChains(self.alignments, ref, mdl, self.calpha_only) - # score them (mdl and ref changed) and keep results - self._lddt_score = _ComputeLDDTScore(self._lddt_ref, self._lddt_mdl) - ############################################################################### # Entity with cached entries for QS scoring @@ -627,6 +551,7 @@ class QSscoreEntity(object): def __init__(self, ent): # copy entity and process/clean it self.original_name = ent.GetName() + self.is_monomer = False ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True) if not conop.GetDefaultLib(): raise RuntimeError("QSscore computation requires a compound library!") @@ -639,9 +564,9 @@ class QSscoreEntity(object): 'removing water, ligands and small peptides.') self.is_valid = False elif self.ent.chain_count == 1: - LogError('Structure ' + ent.GetName() + ' is a monomer. ' - 'QSscore is not defined for monomers.') - self.is_valid = False + LogWarning('Structure ' + ent.GetName() + ' is a monomer.') + self.is_valid = True + self.is_monomer = True else: self.is_valid = True # init cached stuff @@ -899,6 +824,24 @@ def GetContacts(entity, calpha_only, dist_thr=12.0): return contacts +def ComputeOligoLDDT(ref, mdl, alignments, calpha_only): + """Fills cached lddt_score, lddt_mdl and lddt_ref.""" + LogInfo('Computing lDDT score') + LogInfo('Reference %s has: %s chains' % (ref.GetName(), ref.chain_count)) + LogInfo('Model %s has: %s chains' % (mdl.GetName(), mdl.chain_count)) + if mdl.chain_count > ref.chain_count: + LogWarning('MODEL contains more chains than REFERENCE, ' + 'lDDT is not considering them') + # get single chain reference and model + lddt_ref, lddt_mdl = _MergeAlignedChains(alignments, + ref, + mdl, + calpha_only) + # score them (mdl and ref changed) and keep results + oligo_lddt_score = _ComputeLDDTScore(lddt_ref, lddt_mdl) + return oligo_lddt_score + + ############################################################################### # HELPERS ############################################################################### @@ -1130,8 +1073,8 @@ def _GetChemGroupsMapping(qs_ent_1, qs_ent_2): # check if we have any chains left LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping)) - if len(mapped_1) < 2 or len(mapped_2) < 2: - raise QSscoreError('Less than 2 chains left in chem_mapping.') + if len(mapped_1) < 1 or len(mapped_2) < 1: + raise QSscoreError('Less than 1 chains left in chem_mapping.') return chem_mapping def _SelectFew(l, max_elements): @@ -2401,4 +2344,4 @@ def _ComputeLDDTScore(ref, mdl): # specify public interface __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts', - 'GetContacts') + 'GetContacts', 'ComputeOligoLDDT') diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index d4a7c208c0d518bb08141a4b2f5cd34c788c802b..6ec8be1c33a46af1b8e1353476e1e399b9778fc7 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -426,8 +426,8 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) .def("__init__", raw_function(lDDTScorerInitWrapper)) .def(init<std::vector<mol::EntityHandle>&, mol::EntityHandle&, mol::alg::lDDTSettings&, mol::alg::StereoChemicalProps&>()) .add_property("global_score", &mol::alg::lDDTScorer::GetGlobalScore) - .add_property("conserved_residues", &mol::alg::lDDTScorer::GetNumConservedResidues) - .add_property("total_residues", &mol::alg::lDDTScorer::GetNumTotalResidues) + .add_property("conserved_contacts", &mol::alg::lDDTScorer::GetNumConservedContacts) + .add_property("total_contacts", &mol::alg::lDDTScorer::GetNumTotalContacts) .def("PrintPerResidueStats", &mol::alg::lDDTScorer::PrintPerResidueStats) .add_property("local_scores", &get_local_scores_wrapper) .add_property("is_valid", &mol::alg::lDDTScorer::IsValid); diff --git a/modules/mol/alg/src/local_dist_diff_test.cc b/modules/mol/alg/src/local_dist_diff_test.cc index d02f41a32e2d89a7ba1c2255b6e4d4c0033557c2..ff745714d213692be233e011319cc1ebb3908c6c 100644 --- a/modules/mol/alg/src/local_dist_diff_test.cc +++ b/modules/mol/alg/src/local_dist_diff_test.cc @@ -527,8 +527,8 @@ lDDTScorer::lDDTScorer(std::vector<EntityHandle>& init_references, _score_calculated(false), _score_valid(false), _has_local_scores(false), - _num_con_res(-1), - _num_tot_res(-1), + _num_cons_con(-1), + _num_tot_con(-1), _global_score(-1.0) { model_view = model.GetChainList()[0].Select("peptide=true"); @@ -566,8 +566,8 @@ lDDTScorer::lDDTScorer(std::vector<EntityHandle>& init_references, _score_calculated(false), _score_valid(false), _has_local_scores(false), - _num_con_res(-1), - _num_tot_res(-1), + _num_cons_con(-1), + _num_tot_con(-1), _global_score(-1.0) { model_view = model.GetChainList()[0].Select("peptide=true"); _PrepareReferences(); @@ -600,18 +600,18 @@ Real lDDTScorer::GetGlobalScore(){ return _global_score; } -int lDDTScorer::GetNumConservedResidues(){ +int lDDTScorer::GetNumConservedContacts(){ if (!_score_calculated) { _ComputelDDT(); } - return _num_con_res; + return _num_cons_con; } -int lDDTScorer::GetNumTotalResidues(){ +int lDDTScorer::GetNumTotalContacts(){ if (!_score_calculated) { _ComputelDDT(); } - return _num_tot_res; + return _num_tot_con; } std::vector<lDDTLocalScore> lDDTScorer::GetLocalScores(){ @@ -685,8 +685,8 @@ void lDDTScorer::_ComputelDDT(){ if (cov.first == 0) { std::cout << "Global LDDT score: 0.0" << std::endl; - _num_tot_res = 0; - _num_con_res = 0; + _num_tot_con = 0; + _num_cons_con = 0; _global_score = 0.0; _score_calculated = true; _score_valid = false; @@ -697,8 +697,8 @@ void lDDTScorer::_ComputelDDT(){ std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second << " checked, over " << settings.cutoffs.size() << " thresholds)" << std::endl; - _num_tot_res = total_ov.second ? total_ov.second : 1; - _num_con_res = total_ov.first; + _num_tot_con = total_ov.second ? total_ov.second : 1; + _num_cons_con = total_ov.first; _global_score = lddt; _score_calculated = true; _score_valid = true; diff --git a/modules/mol/alg/src/local_dist_diff_test.hh b/modules/mol/alg/src/local_dist_diff_test.hh index ebcf71579a838422b8c5a80b35c3b66ac8c36084..cf77b6c495e9afab94dc4c3bcecd044663bedeba 100644 --- a/modules/mol/alg/src/local_dist_diff_test.hh +++ b/modules/mol/alg/src/local_dist_diff_test.hh @@ -112,8 +112,8 @@ class lDDTScorer StereoChemicalProps& init_stereochemical_params); Real GetGlobalScore(); std::vector<lDDTLocalScore> GetLocalScores(); - int GetNumConservedResidues(); // number of conserved distances in the model - int GetNumTotalResidues(); // the number of total distances in the reference structure + int GetNumConservedContacts(); // number of conserved distances in the model + int GetNumTotalContacts(); // the number of total distances in the reference structure void PrintPerResidueStats(); bool IsValid(); @@ -123,8 +123,8 @@ class lDDTScorer bool _has_local_scores; // number of conserved distances in the model and // the number of total distances in the reference structure - int _num_con_res; - int _num_tot_res; + int _num_cons_con; + int _num_tot_con; Real _global_score; std::vector<lDDTLocalScore> _local_scores; void _ComputelDDT();