Skip to content
Snippets Groups Projects
Commit 6b899e02 authored by Gerardo Tauriello's avatar Gerardo Tauriello
Browse files

SCHWED-1612: Documented new scoring functionalities for loop candidates.

parent 5f22e431
No related branches found
No related tags found
No related merge requests found
......@@ -46,6 +46,7 @@ set (DOC_TEST_SCRIPTS
scripts/modelling_model_termini.py
scripts/modelling_monte_carlo.py
scripts/modelling_loop_candidates.py
scripts/modelling_loop_scoring.py
scripts/sidechain_reconstruct.py
scripts/sidechain_reconstructor.py
......
from ost import io, seq
from promod3 import modelling, loop
# setup raw model
tpl = io.LoadPDB('data/1crn_cut.pdb')
seq_trg = 'TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN'
seq_tpl = 'TTCCPSIVARSNFNVCRLPGTPEA------GCIIIPGATCPGDYAN'
aln = seq.CreateAlignment(seq.CreateSequence('trg', seq_trg),
seq.CreateSequence('tpl', seq_tpl))
aln.AttachView(1, tpl.CreateFullView())
mhandle = modelling.BuildRawModel(aln)
print("Number of gaps in raw model: %d" % len(mhandle.gaps))
# setup default scorers for modelling handle
modelling.SetupDefaultBackboneScoring(mhandle)
modelling.SetupDefaultAllAtomScoring(mhandle)
# setup databases
frag_db = loop.LoadFragDB()
structure_db = loop.LoadStructureDB()
torsion_sampler = loop.LoadTorsionSamplerCoil()
# get data for gap to close
gap = mhandle.gaps[0].Copy()
print("Gap to close: %s" % str(gap))
n_stem = gap.before
c_stem = gap.after
start_resnum = n_stem.GetNumber().GetNum()
start_idx = start_resnum - 1 # res. num. starts at 1
# get loop candidates from FragDB
candidates = modelling.LoopCandidates.FillFromDatabase(\
n_stem, c_stem, gap.full_seq, frag_db, structure_db)
print("Number of loop candidates: %d" % len(candidates))
# all scores will be kept in a score container which we update
all_scores = modelling.ScoreContainer()
# the keys used to identify scores are globally defined
print("Stem RMSD key = '%s'" \
% modelling.ScoringWeights.GetStemRMSDsKey())
print("Profile keys = ['%s', '%s']" \
% (modelling.ScoringWeights.GetSequenceProfileScoresKey(),
modelling.ScoringWeights.GetStructureProfileScoresKey()))
print("Backbone scoring keys = %s" \
% str(modelling.ScoringWeights.GetBackboneScoringKeys()))
print("All atom scoring keys = %s" \
% str(modelling.ScoringWeights.GetAllAtomScoringKeys()))
# get stem RMSDs for each candidate (i.e. how well does it fit?)
# -> this must be done before CCD to be meaningful
candidates.CalculateStemRMSDs(all_scores, n_stem, c_stem)
# close the candidates with CCD
orig_indices = candidates.ApplyCCD(n_stem, c_stem, torsion_sampler)
print("Number of closed loop candidates: %d" % len(candidates))
# get subset of previously computed scores
all_scores = all_scores.Extract(orig_indices)
# add profile scores (needs profile for target sequence)
prof = io.LoadSequenceProfile("data/1CRNA.hhm")
candidates.CalculateSequenceProfileScores(all_scores, structure_db,
prof, start_idx)
candidates.CalculateStructureProfileScores(all_scores, structure_db,
prof, start_idx)
# add backbone scores
scorer = mhandle.backbone_scorer
candidates.CalculateBackboneScores(all_scores, scorer, start_resnum)
# add all atom scores
candidates.CalculateAllAtomScores(all_scores, mhandle, start_resnum)
# use default weights to combine scores
weights = modelling.ScoringWeights.GetWeights(with_db=True,
with_aa=True)
scores = all_scores.LinearCombine(weights)
# rank them (best = lowest "score")
arg_sorted_scores = sorted([(v,i) for i,v in enumerate(scores)])
print("Ranked candidates: score, index")
for v,i in arg_sorted_scores:
print("%g, %d" % (v,i))
# insert best into model, update scorers and clear gaps
best_candidate = candidates[arg_sorted_scores[0][1]]
modelling.InsertLoopClearGaps(mhandle, best_candidate, gap)
print("Number of gaps in closed model: %d" % len(mhandle.gaps))
io.SavePDB(mhandle.model, "model.pdb")
......@@ -359,6 +359,14 @@ class DocTests(unittest.TestCase):
# clean up
os.remove('modified_crambin.pdb')
def testModellingLoopScoring(self):
# run it
self.checkPMRun('modelling_loop_scoring.py', [], 0)
# check that result exists and is readable
io.LoadPDB('model.pdb')
# clean up
os.remove('model.pdb')
################################################################
def testSidechainReconstruct(self):
......
......@@ -176,9 +176,47 @@ The LoopCandidates class
useful to keep track of scores and other data extracted before.
:rtype: :class:`list` of :class:`int`
.. method:: CalculateSequenceProfileScores(structure_db, prof, offset=0)
CalculateStructureProfileScores(structure_db, prof, offset=0)
.. method:: CalculateBackboneScores(score_container, scorer, \
start_resnum, chain_idx=0)
CalculateBackboneScores(score_container, scorer, keys, \
start_resnum, chain_idx=0)
CalculateAllAtomScores(score_container, mhandle, \
start_resnum, chain_idx=0)
CalculateAllAtomScores(score_container, mhandle, keys, \
start_resnum, chain_idx=0)
Calculate backbone / all-atom scores for each loop candidate.
Note that (unless otherwise noted) a lower "score" is better!
The computed scores are in the same same order as the candidates in here.
:param score_container: Add scores to this score container using the given
key names (or the ones from :class:`ScoringWeights`)
:type score_container: :class:`ScoreContainer`
:param scorer: Backbone scoring object with set environment for the
particular loop modelling problem.
:type scorer: :class:`~promod3.scoring.BackboneOverallScorer`
:param mhandle: Modelling handle set up for all atom scoring (see
:func:`SetupDefaultAllAtomScoring`).
:type mhandle: :class:`ModellingHandle`
:param keys: Keys of the desired scorers. If not given, we use the set of
keys given by :meth:`ScoringWeights.GetBackboneScoringKeys` /
:meth:`ScoringWeights.GetAllAtomScoringKeys`.
:type keys: :class:`list` of :class:`str`
:param start_resnum: Res. number defining the position in the SEQRES.
:type start_resnum: :class:`int`
:param chain_idx: Index of chain the loops belong to.
:type chain_idx: :class:`int`
:raises: :exc:`~exceptions.RuntimeError` if :func:`IsAllAtomScoringSetUp`
is False, if *keys* has a key for which no scorer exists or if
anything is raised when calculating the scores.
.. method:: CalculateSequenceProfileScores(score_container, structure_db, \
prof, offset=0)
CalculateStructureProfileScores(score_container, structure_db, \
prof, offset=0)
Calculates a score comparing the given profile *prof* starting at *offset*
with the sequence / structure profile of each candidate as extracted from
......@@ -192,6 +230,11 @@ The LoopCandidates class
given *structure_db* (e.g. :meth:`FillFromDatabase` must have been called
with this DB).
The computed scores are in the same same order as the candidates in here.
:param score_container: Add scores to this score container using the default
key name defined in :class:`ScoringWeights`
:type score_container: :class:`ScoreContainer`
:param structural_db: Structural database used in :meth:`FillFromDatabase`
:type structural_db: :class:`~promod3.loop.StructureDB`
:param prof: Profile information for target.
......@@ -199,17 +242,13 @@ The LoopCandidates class
:param offset: Loop starts at index *offset* in *prof*.
:type offset: :class:`int`
:return: Profile score for each candidate. The returned scores are in the
same order as this container.
:rtype: :class:`list` of :class:`float`
:raises: :exc:`~exceptions.RuntimeError` if :meth:`HasFragmentInfos` is
False, if *structure_db* is incompatible with the stored fragment
infos or if *prof* has less than *offset+len* elements (len =
length of loops stored in here).
.. method:: CalculateStemRMSDs(n_stem, c_stem)
.. method:: CalculateStemRMSDs(score_container, n_stem, c_stem)
Calculates RMSD between the given stems and the first and last residue of
the loop candidates. This first superposes the first loop residue with
......@@ -219,19 +258,31 @@ The LoopCandidates class
Note that this score is only useful before calling :meth:`ApplyCCD` or
:meth:`ApplyKIC`.
The computed scores are in the same same order as the candidates in here.
:param score_container: Add scores to this score container using the default
key name defined in :class:`ScoringWeights`
:type score_container: :class:`ScoreContainer`
:param n_stem: The residue at the N-terminal end of the loop.
:type n_stem: :class:`ost.mol.ResidueHandle`
:param c_stem: The residue at the C-terminal end of the loop.
:type c_stem: :class:`ost.mol.ResidueHandle`
:return: Stem RMSD for each candidate. The returned scores are in the same
order as this container.
:rtype: :class:`list` of :class:`float`
:raises: :exc:`~exceptions.RuntimeError` if stems do no contain N, CA and C
atoms.
.. method:: CalculateSequenceProfileScores(structure_db, prof, offset=0)
CalculateStructureProfileScores(structure_db, prof, offset=0)
CalculateStemRMSDs(n_stem, c_stem)
Same as the *score_container* variant above, but here we directly return the
score vector instead of storing it in a container.
:return: Score for each candidate (same order as candidates in here).
:rtype: :class:`list` of :class:`float`
.. method:: Add(bb_list)
:param bb_list: The loop candidate to be added.
......@@ -335,3 +386,158 @@ The LoopCandidates class
:returns: Largest possible cluster with all members having a
CA RMSD below **max_dist** to cluster centroid.
:rtype: :class:`LoopCandidates`
Keeping track of loop candidate scores
--------------------------------------------------------------------------------
Two helper classes are used to keep track and combine different scores computed
on loop candidates.
.. class:: ScoreContainer
Container to keep vectors of scores (one for each loop candidate) for each
scorer (one vector for each single scorer). Each score vector is guaranteed
to have the same number of values.
.. method:: IsEmpty()
:return: True, if no loop candidates have been scored with any scorer yet.
:rtype: :class:`bool`
.. method:: Contains(key)
:return: True, if a score vector for this key was already added.
:rtype: :class:`bool`
:param key: Key for desired scorer.
:type key: :class:`str`
.. method:: Get(key)
:return: Score vector for the given *key*.
:rtype: :class:`list` of :meth:`GetNumCandidates` :class:`float`
:param key: Key for desired score vector.
:type key: :class:`str`
:raises: :exc:`~exceptions.RuntimeError` if there are no scores for that
*key*.
.. method:: Set(key, scores)
:param key: Set scores for that *key*.
:type key: :class:`str`
:param scores: Score vector to set.
:type scores: :class:`list` of :class:`float`
:raises: :exc:`~exceptions.RuntimeError` if this container contains other
score vectors with a different number of entries.
.. method:: GetNumCandidates()
:return: Number of loop candidates that are scored here. This is the length
of each score vector in this container.
:rtype: :class:`int`
.. method:: LinearCombine(linear_weights)
:return: Weighted, linear combination of scores.
:rtype: :class:`list` of :meth:`GetNumCandidates` :class:`float`
:param linear_weights: Weights for each scorer.
:type linear_weights: :class:`dict` (keys: :class:`str`,
values: :class:`float`)
:raises: :exc:`~exceptions.RuntimeError` if *linear_weights* has a key for
which no scores were added.
.. method:: Copy()
:return: Full copy of this container.
:rtype: :class:`ScoreContainer`
.. method:: Extract(indices)
:return: Container with scores for a subset of loop candidates.
:rtype: :class:`ScoreContainer`
:param indices: List of loop candidate indices to pick
(in [0, :meth:`GetNumCandidates`-1])
:type indices: :class:`list` of :class:`int`
:raises: :exc:`~exceptions.RuntimeError` if any index is out of bounds.
.. method:: Extend(other)
Extend each score vector with the score vector of *other* (must have
matching keys).
:param other: Score container to be added to this one.
:type other: :class:`ScoreContainer`
.. class:: ScoringWeights
Globally accessible set of weights to be used in scoring. This also defines
a consistent naming of keys used for backbone and all atom scorers as set up
by :func:`SetupDefaultBackboneScoring` and :func:`SetupDefaultAllAtomScoring`.
If you choose to modify the weights, please ensure to set consistently named
keys in here and to use consistently named scorers and scoring computations!
.. staticmethod:: GetWeights(with_db=False, with_aa=False)
:return: Named weights to be used when scoring loop candidates. The default
weights were optimized to give the best performance when choosing
the loop candidate with the lowest combined score. Each set of
weights includes (different) backbone scoring weights.
:rtype: :class:`dict` (keys: :class:`str`, values: :class:`float`)
:param with_db: True to choose a set of weights including DB specific scores
(stem RMSD and profile scores)
:type with_db: :class:`bool`
:param with_aa: True to choose a set of weights including all atom scores
:type with_aa: :class:`bool`
.. staticmethod:: SetWeights(with_db, with_aa, weights)
Overwrite a set of weights as returned by :meth:`GetWeights`.
.. staticmethod:: GetStemRMSDsKey()
GetSequenceProfileScoresKey()
GetStructureProfileScoresKey()
:return: Default key for stem RMSD / sequence profile / structure profile
scores.
:rtype: :class:`str`
.. staticmethod:: SetStemRMSDsKey(key)
SetSequenceProfileScoresKey(key)
SetStructureProfileScoresKey(key)
:param key: New default key for stem RMSD / sequence profile / structure
profile scores.
:type key: :class:`str`
.. staticmethod:: GetBackboneScoringKeys()
GetAllAtomScoringKeys()
:return: List of backbone / all-atom scorers to be computed for any set of
weights defined in here.
:rtype: :class:`list` of :class:`str`
.. staticmethod:: SetBackboneScoringKeys(keys)
SetAllAtomScoringKeys(keys)
:param keys: New list of backbone / all-atom scorers to be computed for any
set of weights defined in here.
:type keys: :class:`list` of :class:`str`
Example: loop scoring in modelling
--------------------------------------------------------------------------------
In the example below, we show how we find and choose a loop candidate to close a
gap for a model. This shows the combined usage of :class:`ModellingHandle` to
keep a consistent modelling environment, :class:`LoopCandidates` with its
scoring routines, :class:`ScoreContainer` for keeping track of scores and
:class:`ScoringWeights` to combine scores:
.. literalinclude:: ../../../tests/doc/scripts/modelling_loop_scoring.py
......@@ -267,16 +267,16 @@ void export_loop_candidate() {
arg("pivot_three")))
.def("CalculateBackboneScores", &WrapCalculateBackboneScores,
(arg("score_container"), arg("scorer"),
arg("start_resnum"), arg("chain_index")=0))
arg("start_resnum"), arg("chain_idx")=0))
.def("CalculateBackboneScores", &WrapCalculateBackboneScoresK,
(arg("score_container"), arg("scorer"), arg("keys"),
arg("start_resnum"), arg("chain_index")=0))
arg("start_resnum"), arg("chain_idx")=0))
.def("CalculateAllAtomScores", &WrapCalculateAllAtomScores,
(arg("score_container"), arg("mhandle"),
arg("start_resnum"), arg("chain_index")=0))
arg("start_resnum"), arg("chain_idx")=0))
.def("CalculateAllAtomScores", &WrapCalculateAllAtomScoresK,
(arg("score_container"), arg("mhandle"), arg("keys"),
arg("start_resnum"), arg("chain_index")=0))
arg("start_resnum"), arg("chain_idx")=0))
.def("CalculateSequenceProfileScores", &WrapCalcSequenceProfScores,
(arg("structure_db"), arg("prof"), arg("offset")=0))
.def("CalculateSequenceProfileScores", &WrapCalcSequenceProfScoresSC,
......
......@@ -750,3 +750,6 @@ PairwiseScorer class
This scorer assumes that the attached environment has pairwise functions
defined (see :meth:`BackboneScoreEnv.ApplyPairwiseFunction`) as soon as a
score is to be calculated.
Note that for this scorer a higher "score" is better! So take care when
combining this to other scores, where it is commonly the other way around.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment