SCHWED-1612: Documented new scoring functionalities for loop candidates.

6b899e02 · Gerardo Tauriello · 5f22e431 · 6b899e02 · 6b899e02 · 6b899e02
Commit 6b899e02 authored 8 years ago by Gerardo Tauriello
--- a/doc/tests/CMakeLists.txt
+++ b/doc/tests/CMakeLists.txt
@@ -46,6 +46,7 @@ set (DOC_TEST_SCRIPTS
  scripts/modelling_model_termini.py
  scripts/modelling_monte_carlo.py
  scripts/modelling_loop_candidates.py
+  scripts/modelling_loop_scoring.py

  scripts/sidechain_reconstruct.py
  scripts/sidechain_reconstructor.py

--- a/doc/tests/scripts/modelling_loop_scoring.py
+++ b/doc/tests/scripts/modelling_loop_scoring.py
+from ost import io, seq
+from promod3 import modelling, loop
+
+# setup raw model
+tpl = io.LoadPDB('data/1crn_cut.pdb')
+seq_trg = 'TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN'
+seq_tpl = 'TTCCPSIVARSNFNVCRLPGTPEA------GCIIIPGATCPGDYAN'
+aln = seq.CreateAlignment(seq.CreateSequence('trg', seq_trg),
+                          seq.CreateSequence('tpl', seq_tpl))
+aln.AttachView(1, tpl.CreateFullView())
+mhandle = modelling.BuildRawModel(aln)
+print("Number of gaps in raw model: %d" % len(mhandle.gaps))
+
+# setup default scorers for modelling handle
+modelling.SetupDefaultBackboneScoring(mhandle)
+modelling.SetupDefaultAllAtomScoring(mhandle)
+
+# setup databases
+frag_db = loop.LoadFragDB()
+structure_db = loop.LoadStructureDB()
+torsion_sampler = loop.LoadTorsionSamplerCoil()
+
+# get data for gap to close
+gap = mhandle.gaps[0].Copy()
+print("Gap to close: %s" % str(gap))
+n_stem = gap.before
+c_stem = gap.after
+start_resnum = n_stem.GetNumber().GetNum()
+start_idx = start_resnum - 1   # res. num. starts at 1
+
+# get loop candidates from FragDB
+candidates = modelling.LoopCandidates.FillFromDatabase(\
+                n_stem, c_stem, gap.full_seq, frag_db, structure_db)
+print("Number of loop candidates: %d" % len(candidates))
+
+# all scores will be kept in a score container which we update
+all_scores = modelling.ScoreContainer()
+# the keys used to identify scores are globally defined
+print("Stem RMSD key = '%s'" \
+      % modelling.ScoringWeights.GetStemRMSDsKey())
+print("Profile keys = ['%s', '%s']" \
+      % (modelling.ScoringWeights.GetSequenceProfileScoresKey(),
+         modelling.ScoringWeights.GetStructureProfileScoresKey()))
+print("Backbone scoring keys = %s" \
+      % str(modelling.ScoringWeights.GetBackboneScoringKeys()))
+print("All atom scoring keys = %s" \
+      % str(modelling.ScoringWeights.GetAllAtomScoringKeys()))
+
+# get stem RMSDs for each candidate (i.e. how well does it fit?)
+# -> this must be done before CCD to be meaningful
+candidates.CalculateStemRMSDs(all_scores, n_stem, c_stem)
+
+# close the candidates with CCD
+orig_indices = candidates.ApplyCCD(n_stem, c_stem, torsion_sampler)
+print("Number of closed loop candidates: %d" % len(candidates))
+
+# get subset of previously computed scores
+all_scores = all_scores.Extract(orig_indices)
+
+# add profile scores (needs profile for target sequence)
+prof = io.LoadSequenceProfile("data/1CRNA.hhm")
+candidates.CalculateSequenceProfileScores(all_scores, structure_db,
+                                          prof, start_idx)
+candidates.CalculateStructureProfileScores(all_scores, structure_db,
+                                           prof, start_idx)
+# add backbone scores
+scorer = mhandle.backbone_scorer
+candidates.CalculateBackboneScores(all_scores, scorer, start_resnum)
+# add all atom scores
+candidates.CalculateAllAtomScores(all_scores, mhandle, start_resnum)
+
+# use default weights to combine scores
+weights = modelling.ScoringWeights.GetWeights(with_db=True,
+                                              with_aa=True)
+scores = all_scores.LinearCombine(weights)
+
+# rank them (best = lowest "score")
+arg_sorted_scores = sorted([(v,i) for i,v in enumerate(scores)])
+print("Ranked candidates: score, index")
+for v,i in arg_sorted_scores:
+  print("%g, %d" % (v,i))
+
+# insert best into model, update scorers and clear gaps
+best_candidate = candidates[arg_sorted_scores[0][1]]
+modelling.InsertLoopClearGaps(mhandle, best_candidate, gap)
+print("Number of gaps in closed model: %d" % len(mhandle.gaps))
+io.SavePDB(mhandle.model, "model.pdb")
--- a/doc/tests/test_doctests.py
+++ b/doc/tests/test_doctests.py
@@ -359,6 +359,14 @@ class DocTests(unittest.TestCase):
        # clean up
        os.remove('modified_crambin.pdb')

+    def testModellingLoopScoring(self):
+        # run it
+        self.checkPMRun('modelling_loop_scoring.py', [], 0)
+        # check that result exists and is readable
+        io.LoadPDB('model.pdb')
+        # clean up
+        os.remove('model.pdb')
+
    ################################################################

    def testSidechainReconstruct(self):

--- a/modelling/doc/loop_candidates.rst
+++ b/modelling/doc/loop_candidates.rst
@@ -176,9 +176,47 @@ The LoopCandidates class
             useful to keep track of scores and other data extracted before.
    :rtype:  :class:`list` of :class:`int`

-
-  .. method:: CalculateSequenceProfileScores(structure_db, prof, offset=0)
-              CalculateStructureProfileScores(structure_db, prof, offset=0)
+  .. method:: CalculateBackboneScores(score_container, scorer, \
+                                      start_resnum, chain_idx=0)
+              CalculateBackboneScores(score_container, scorer, keys, \
+                                      start_resnum, chain_idx=0)
+              CalculateAllAtomScores(score_container, mhandle, \
+                                     start_resnum, chain_idx=0)
+              CalculateAllAtomScores(score_container, mhandle, keys, \
+                                     start_resnum, chain_idx=0)
+
+    Calculate backbone / all-atom scores for each loop candidate.
+    Note that (unless otherwise noted) a lower "score" is better!
+
+    The computed scores are in the same same order as the candidates in here.
+
+    :param score_container: Add scores to this score container using the given
+                            key names (or the ones from :class:`ScoringWeights`)
+    :type score_container:  :class:`ScoreContainer`
+    :param scorer: Backbone scoring object with set environment for the
+                   particular loop modelling problem.
+    :type scorer:  :class:`~promod3.scoring.BackboneOverallScorer`
+    :param mhandle: Modelling handle set up for all atom scoring (see
+                    :func:`SetupDefaultAllAtomScoring`).
+    :type mhandle:  :class:`ModellingHandle`
+    :param keys: Keys of the desired scorers. If not given, we use the set of
+                 keys given by :meth:`ScoringWeights.GetBackboneScoringKeys` /
+                 :meth:`ScoringWeights.GetAllAtomScoringKeys`.
+    :type keys:  :class:`list` of :class:`str`
+    :param start_resnum: Res. number defining the position in the SEQRES.
+    :type start_resnum:  :class:`int`
+    :param chain_idx: Index of chain the loops belong to.
+    :type chain_idx:  :class:`int`
+
+    :raises: :exc:`~exceptions.RuntimeError` if :func:`IsAllAtomScoringSetUp`
+             is False, if *keys* has a key for which no scorer exists or if
+             anything is raised when calculating the scores.
+
+
+  .. method:: CalculateSequenceProfileScores(score_container, structure_db, \
+                                             prof, offset=0)
+              CalculateStructureProfileScores(score_container, structure_db, \
+                                              prof, offset=0)

    Calculates a score comparing the given profile *prof* starting at *offset*
    with the sequence / structure profile of each candidate as extracted from
@@ -192,6 +230,11 @@ The LoopCandidates class
    given *structure_db* (e.g. :meth:`FillFromDatabase` must have been called
    with this DB).

+    The computed scores are in the same same order as the candidates in here.
+
+    :param score_container: Add scores to this score container using the default
+                            key name defined in :class:`ScoringWeights`
+    :type score_container:  :class:`ScoreContainer`
    :param structural_db: Structural database used in :meth:`FillFromDatabase`
    :type structural_db:  :class:`~promod3.loop.StructureDB`
    :param prof: Profile information for target.
@@ -199,17 +242,13 @@ The LoopCandidates class
    :param offset: Loop starts at index *offset* in *prof*.
    :type offset:  :class:`int`

-    :return: Profile score for each candidate. The returned scores are in the
-             same order as this container.
-    :rtype:  :class:`list` of :class:`float`
-
    :raises: :exc:`~exceptions.RuntimeError` if :meth:`HasFragmentInfos` is
             False, if *structure_db* is incompatible with the stored fragment
             infos or if *prof* has less than *offset+len* elements (len =
             length of loops stored in here).


-  .. method:: CalculateStemRMSDs(n_stem, c_stem)
+  .. method:: CalculateStemRMSDs(score_container, n_stem, c_stem)

    Calculates RMSD between the given stems and the first and last residue of
    the loop candidates. This first superposes the first loop residue with
@@ -219,19 +258,31 @@ The LoopCandidates class
    Note that this score is only useful before calling :meth:`ApplyCCD` or
    :meth:`ApplyKIC`.

+    The computed scores are in the same same order as the candidates in here.
+
+    :param score_container: Add scores to this score container using the default
+                            key name defined in :class:`ScoringWeights`
+    :type score_container:  :class:`ScoreContainer`
    :param n_stem: The residue at the N-terminal end of the loop.
    :type n_stem:  :class:`ost.mol.ResidueHandle`
    :param c_stem: The residue at the C-terminal end of the loop.
    :type c_stem:  :class:`ost.mol.ResidueHandle`

-    :return: Stem RMSD for each candidate. The returned scores are in the same
-             order as this container.
-    :rtype:  :class:`list` of :class:`float`
-
    :raises: :exc:`~exceptions.RuntimeError` if stems do no contain N, CA and C
             atoms.


+  .. method:: CalculateSequenceProfileScores(structure_db, prof, offset=0)
+              CalculateStructureProfileScores(structure_db, prof, offset=0)
+              CalculateStemRMSDs(n_stem, c_stem)
+
+    Same as the *score_container* variant above, but here we directly return the
+    score vector instead of storing it in a container.
+
+    :return: Score for each candidate (same order as candidates in here).
+    :rtype:  :class:`list` of :class:`float`
+
+
  .. method:: Add(bb_list)

    :param bb_list: The loop candidate to be added.
@@ -335,3 +386,158 @@ The LoopCandidates class
    :returns:           Largest possible cluster with all members having a
                        CA RMSD below **max_dist** to cluster centroid.
    :rtype:             :class:`LoopCandidates`
+
+
+Keeping track of loop candidate scores
+--------------------------------------------------------------------------------
+
+Two helper classes are used to keep track and combine different scores computed
+on loop candidates.
+
+.. class:: ScoreContainer
+
+  Container to keep vectors of scores (one for each loop candidate) for each
+  scorer (one vector for each single scorer). Each score vector is guaranteed
+  to have the same number of values.
+
+  .. method:: IsEmpty()
+
+    :return: True, if no loop candidates have been scored with any scorer yet.
+    :rtype:  :class:`bool`
+
+  .. method:: Contains(key)
+
+    :return: True, if a score vector for this key was already added.
+    :rtype:  :class:`bool`
+    :param key: Key for desired scorer.
+    :type key:  :class:`str`
+
+  .. method:: Get(key)
+
+    :return: Score vector for the given *key*.
+    :rtype:  :class:`list` of :meth:`GetNumCandidates` :class:`float`
+    :param key: Key for desired score vector.
+    :type key:  :class:`str`
+    :raises: :exc:`~exceptions.RuntimeError` if there are no scores for that
+             *key*.
+
+  .. method:: Set(key, scores)
+
+    :param key: Set scores for that *key*.
+    :type key:  :class:`str`
+    :param scores: Score vector to set.
+    :type scores:  :class:`list` of :class:`float`
+    :raises: :exc:`~exceptions.RuntimeError` if this container contains other
+             score vectors with a different number of entries.
+
+  .. method:: GetNumCandidates()
+
+    :return: Number of loop candidates that are scored here. This is the length
+             of each score vector in this container.
+    :rtype:  :class:`int`
+
+  .. method:: LinearCombine(linear_weights)
+
+    :return: Weighted, linear combination of scores.
+    :rtype:  :class:`list` of :meth:`GetNumCandidates` :class:`float`
+
+    :param linear_weights: Weights for each scorer.
+    :type linear_weights:  :class:`dict` (keys: :class:`str`,
+                           values: :class:`float`)
+
+    :raises: :exc:`~exceptions.RuntimeError` if *linear_weights* has a key for
+             which no scores were added.
+
+  .. method:: Copy()
+
+    :return: Full copy of this container.
+    :rtype:  :class:`ScoreContainer`
+
+  .. method:: Extract(indices)
+
+    :return: Container with scores for a subset of loop candidates.
+    :rtype:  :class:`ScoreContainer`
+
+    :param indices: List of loop candidate indices to pick
+                    (in [0, :meth:`GetNumCandidates`-1])
+    :type indices:  :class:`list` of :class:`int`
+
+    :raises: :exc:`~exceptions.RuntimeError` if any index is out of bounds.
+
+  .. method:: Extend(other)
+
+    Extend each score vector with the score vector of *other* (must have
+    matching keys).
+
+    :param other: Score container to be added to this one.
+    :type other:  :class:`ScoreContainer`
+
+
+.. class:: ScoringWeights
+
+  Globally accessible set of weights to be used in scoring. This also defines
+  a consistent naming of keys used for backbone and all atom scorers as set up
+  by :func:`SetupDefaultBackboneScoring` and :func:`SetupDefaultAllAtomScoring`.
+
+  If you choose to modify the weights, please ensure to set consistently named
+  keys in here and to use consistently named scorers and scoring computations!
+
+  .. staticmethod:: GetWeights(with_db=False, with_aa=False)
+
+    :return: Named weights to be used when scoring loop candidates. The default
+             weights were optimized to give the best performance when choosing
+             the loop candidate with the lowest combined score. Each set of
+             weights includes (different) backbone scoring weights.
+    :rtype:  :class:`dict` (keys: :class:`str`, values: :class:`float`)
+
+    :param with_db: True to choose a set of weights including DB specific scores
+                    (stem RMSD and profile scores)
+    :type with_db:  :class:`bool`
+    :param with_aa: True to choose a set of weights including all atom scores
+    :type with_aa:  :class:`bool`
+
+  .. staticmethod:: SetWeights(with_db, with_aa, weights)
+
+    Overwrite a set of weights as returned by :meth:`GetWeights`.
+
+  .. staticmethod:: GetStemRMSDsKey()
+                    GetSequenceProfileScoresKey()
+                    GetStructureProfileScoresKey()
+
+    :return: Default key for stem RMSD / sequence profile / structure profile
+             scores.
+    :rtype:  :class:`str`
+
+  .. staticmethod:: SetStemRMSDsKey(key)
+                    SetSequenceProfileScoresKey(key)
+                    SetStructureProfileScoresKey(key)
+
+    :param key: New default key for stem RMSD / sequence profile / structure
+                profile scores.
+    :type key:  :class:`str`
+
+  .. staticmethod:: GetBackboneScoringKeys()
+                    GetAllAtomScoringKeys()
+
+    :return: List of backbone / all-atom scorers to be computed for any set of
+             weights defined in here.
+    :rtype:  :class:`list` of :class:`str`
+
+  .. staticmethod:: SetBackboneScoringKeys(keys)
+                    SetAllAtomScoringKeys(keys)
+
+    :param keys: New list of backbone / all-atom scorers to be computed for any
+                 set of weights defined in here.
+    :type keys:  :class:`list` of :class:`str`
+
+
+Example: loop scoring in modelling
+--------------------------------------------------------------------------------
+
+In the example below, we show how we find and choose a loop candidate to close a
+gap for a model. This shows the combined usage of :class:`ModellingHandle` to
+keep a consistent modelling environment, :class:`LoopCandidates` with its
+scoring routines, :class:`ScoreContainer` for keeping track of scores and
+:class:`ScoringWeights` to combine scores:
+
+.. literalinclude:: ../../../tests/doc/scripts/modelling_loop_scoring.py
--- a/modelling/pymod/export_loop_candidate.cc
+++ b/modelling/pymod/export_loop_candidate.cc
@@ -267,16 +267,16 @@ void export_loop_candidate() {
          arg("pivot_three")))
    .def("CalculateBackboneScores", &WrapCalculateBackboneScores,
         (arg("score_container"), arg("scorer"),
-          arg("start_resnum"), arg("chain_index")=0))
+          arg("start_resnum"), arg("chain_idx")=0))
    .def("CalculateBackboneScores", &WrapCalculateBackboneScoresK,
         (arg("score_container"), arg("scorer"), arg("keys"),
-          arg("start_resnum"), arg("chain_index")=0))
+          arg("start_resnum"), arg("chain_idx")=0))
    .def("CalculateAllAtomScores", &WrapCalculateAllAtomScores,
         (arg("score_container"), arg("mhandle"),
-          arg("start_resnum"), arg("chain_index")=0))
+          arg("start_resnum"), arg("chain_idx")=0))
    .def("CalculateAllAtomScores", &WrapCalculateAllAtomScoresK,
         (arg("score_container"), arg("mhandle"), arg("keys"),
-          arg("start_resnum"), arg("chain_index")=0))
+          arg("start_resnum"), arg("chain_idx")=0))
    .def("CalculateSequenceProfileScores", &WrapCalcSequenceProfScores,
         (arg("structure_db"), arg("prof"), arg("offset")=0))
    .def("CalculateSequenceProfileScores", &WrapCalcSequenceProfScoresSC,

--- a/scoring/doc/backbone_scorers.rst
+++ b/scoring/doc/backbone_scorers.rst
@@ -750,3 +750,6 @@ PairwiseScorer class
  This scorer assumes that the attached environment has pairwise functions
  defined (see :meth:`BackboneScoreEnv.ApplyPairwiseFunction`) as soon as a
  score is to be calculated.
+
+  Note that for this scorer a higher "score" is better! So take care when
+  combining this to other scores, where it is commonly the other way around.