From a8bba72aeeab3ced5762364386455f95e6a30744 Mon Sep 17 00:00:00 2001 From: Gerardo Tauriello <gerardo.tauriello@unibas.ch> Date: Wed, 17 Aug 2016 14:04:45 +0200 Subject: [PATCH] Doc update: redesigned modelling module with new loop modelling code. --- loop/doc/CMakeLists.txt | 3 - loop/doc/backbone.rst | 2 +- loop/doc/index.rst | 22 +- loop/doc/load_loop_objects.rst | 2 +- loop/doc/structure_db.rst | 104 +++- loop/doc/torsion_sampler.rst | 2 +- modelling/doc/CMakeLists.txt | 8 +- modelling/doc/gap_handling.rst | 293 ++++++++++ modelling/doc/index.rst | 511 +----------------- .../doc/loop_candidates.rst | 210 ++----- {loop => modelling}/doc/loop_closing.rst | 166 +++--- modelling/doc/model_checking.rst | 29 + {loop => modelling}/doc/monte_carlo.rst | 156 +++--- modelling/doc/pipeline.rst | 226 ++++++++ modelling/pymod/_molprobity.py | 2 +- modelling/pymod/_pipeline.py | 15 +- scoring/doc/index.rst | 4 +- 17 files changed, 906 insertions(+), 849 deletions(-) create mode 100644 modelling/doc/gap_handling.rst rename loop/doc/helper_classes.rst => modelling/doc/loop_candidates.rst (66%) rename {loop => modelling}/doc/loop_closing.rst (62%) create mode 100644 modelling/doc/model_checking.rst rename {loop => modelling}/doc/monte_carlo.rst (75%) create mode 100644 modelling/doc/pipeline.rst diff --git a/loop/doc/CMakeLists.txt b/loop/doc/CMakeLists.txt index 2d997e35..47136ac1 100644 --- a/loop/doc/CMakeLists.txt +++ b/loop/doc/CMakeLists.txt @@ -1,12 +1,9 @@ set(LOOP_RST index.rst torsion_sampler.rst - loop_closing.rst backbone.rst - monte_carlo.rst structure_db.rst load_loop_objects.rst - helper_classes.rst ) add_doc_source(NAME loop RST ${LOOP_RST}) diff --git a/loop/doc/backbone.rst b/loop/doc/backbone.rst index 6be5e8ed..3b1f25e4 100644 --- a/loop/doc/backbone.rst +++ b/loop/doc/backbone.rst @@ -1,4 +1,4 @@ -Backbone +Representing Loops ================================================================================ .. currentmodule:: promod3.loop diff --git a/loop/doc/index.rst b/loop/doc/index.rst index e05c3774..0039b60c 100644 --- a/loop/doc/index.rst +++ b/loop/doc/index.rst @@ -1,15 +1,14 @@ -:mod:`~promod3.loop` - Loop Modelling +:mod:`~promod3.loop` - Loop Handling ================================================================================ .. module:: promod3.loop - :synopsis: Loop Modelling + :synopsis: Loop Handling .. currentmodule:: promod3.loop -Tools and algorithms for loop modelling. -This module provides ways for representation and manipulation of peptides and -finally connect them to larger structures. The following example should give -you a starting point to get an idea of what can be done. +Tools and algorithms for loop handling. This module provides ways for +representation of peptides and to obtain fragments to potentially use as +loops. The following example should give you an idea of what can be done: .. literalinclude:: ../../../tests/doc/scripts/loop_main.py @@ -18,10 +17,7 @@ Contents: .. toctree:: :maxdepth: 2 - Representing Loops <backbone> - Have a closer look at backbone dihedral angles <torsion_sampler> - A dump for structural data <structure_db> - Closing Loops - adapt them to the environment <loop_closing> - Generating Loops DeNovo <monte_carlo> - Loading Precomputed Loop Objects <load_loop_objects> - Helper Classes <helper_classes> + backbone + torsion_sampler + structure_db + load_loop_objects diff --git a/loop/doc/load_loop_objects.rst b/loop/doc/load_loop_objects.rst index 383c4f0f..15045223 100644 --- a/loop/doc/load_loop_objects.rst +++ b/loop/doc/load_loop_objects.rst @@ -1,4 +1,4 @@ -Load Loop Objects +Loading Precomputed Objects ================================================================================ .. currentmodule:: promod3.loop diff --git a/loop/doc/structure_db.rst b/loop/doc/structure_db.rst index 1a4909a9..280b4998 100644 --- a/loop/doc/structure_db.rst +++ b/loop/doc/structure_db.rst @@ -1,4 +1,4 @@ -Structural Database +Structural Data ================================================================================ .. currentmodule:: promod3.loop @@ -76,7 +76,7 @@ Defining Chains and Fragments Length of the fragment (:class:`int`) -The Database +The Structure Database -------------------------------------------------------------------------------- The following code example demonstrates how to create a structural database @@ -747,6 +747,106 @@ arbitrary linear combination of following components: .. autoclass:: FraggerHandle :members: + +The PsipredPrediction class +-------------------------------------------------------------------------------- + +.. class:: PsipredPrediction + + A container for the secondary structure prediction by Psipred. + + .. method:: PsipredPrediction() + + Constructs empty container + + .. method:: PsipredPrediction(prediction, confidence) + + Constructs container with given content + + :param prediction: Secondary structure prediction as element in ['H','E','C'] + :param confidence: Confidence of prediction as element in [0,9] + + :type prediction: :class:`list` + :type confidence: :class:`list` + + :raises: :exc:`~exceptions.RuntimeError` if size of **prediction** and + **confidence** are inconsistent or if they contain an invalid + element + + .. method:: FromHHM(filename) + + Static function to Load a :class:`PsipredPrediction` object from hhm file, + as they are provided by the hhsearch suite + + :param filename: Name of file + :type filename: :class:`str` + + .. method:: FromHoriz(filename) + + Static function to Load a :class:`PsipredPrediction` object from horiz file, + as they are produced by the psipred executable + + :param filename: Name of file + :type filename: :class:`str` + + .. method:: Add(prediction, confidence) + + Adds and appends a single residue psipred prediction at the end + + :param prediction: Prediction, must be one in ['H','E','C'] + :param confidence: Confidence of prediction, must be in [0,9] + + :type prediction: :class:`str` + :type confidence: :class:`int` + + :raises: :exc:`~exceptions.RuntimeError` if input contains invalid elements + + .. method:: Extract(from, to) + + Extracts content and returns a sub-:class:`PsipredPrediction` with range **from** + to **to**, not including **to** itself + + :param from: Idx to start + :param to: Idx to end + + :type from: :class:`int` + :type to: :class:`int` + + :returns: :class:`PsipredPrediction` with the specified range + + :raises: :exc:`~exceptions.RuntimeError` if **from** or **to** are invalid + + .. method:: GetPrediction(idx) + + :param idx: Index to get prediction from + :type idx: :class:`int` + :returns: Psipred prediction at pos **idx** + :raises: :exc:`~exceptions.RuntimeError` if **idx** is invalid + + .. method:: GetConfidence(idx) + + :param idx: Index to get confidence from + :type idx: :class:`int` + :returns: Psipred confidence at pos **idx** + :raises: :exc:`~exceptions.RuntimeError` if **idx** is invalid + + .. method:: GetPredictions() + + Get all the predictions in the container + + :returns: :class:`list` containing all the predictions in the container + + .. method:: GetConfidences() + + Get all the confidences in the container + + :returns: :class:`list` containing all the confidences in the container + + .. method:: __len__() + + :returns: Number of elements in container + + .. [soding2005] Söding J (2005). Protein homology detection by HMM-HMM comparison. Bioinformatics 21 (7): 951–960. .. [sanner1996] Sanner M, Olson AJ, Spehner JC (1996). Reduced Surface: an Efficient Way to Compute Molecular Surfaces. Biopolymers 38 (3): 305-320. .. [chakravarty1999] Chakravarty S, Varadarajan R (1999). Residue depth: a novel parameter for the analysis of protein structure and stability. Structure 7 (7): 723–732. diff --git a/loop/doc/torsion_sampler.rst b/loop/doc/torsion_sampler.rst index bb88f9b1..0962c356 100644 --- a/loop/doc/torsion_sampler.rst +++ b/loop/doc/torsion_sampler.rst @@ -1,4 +1,4 @@ -Torsion Sampler +Sampling Dihedral Angles ================================================================================ .. currentmodule:: promod3.loop diff --git a/modelling/doc/CMakeLists.txt b/modelling/doc/CMakeLists.txt index 082558a1..d0ec23f2 100644 --- a/modelling/doc/CMakeLists.txt +++ b/modelling/doc/CMakeLists.txt @@ -1,5 +1,11 @@ set(MODELLING_RST -index.rst + index.rst + pipeline.rst + model_checking.rst + gap_handling.rst + loop_candidates.rst + loop_closing.rst + monte_carlo.rst ) add_doc_source(NAME modelling RST ${MODELLING_RST}) diff --git a/modelling/doc/gap_handling.rst b/modelling/doc/gap_handling.rst new file mode 100644 index 00000000..b2dd897c --- /dev/null +++ b/modelling/doc/gap_handling.rst @@ -0,0 +1,293 @@ +Handling Gaps +================================================================================ + +.. currentmodule:: promod3.modelling + +This chapter describes the gap classes and functionality attached to them. These +classes / functions are used within the modelling pipeline. + +Gap classes +-------------------------------------------------------------------------------- + +.. class:: StructuralGap(before, after, seq) + + Describes a structural gap, i.e. a loop to be modeled. The gap may either be + terminal or between two defined regions. The gap stores information of the + last residue before and the first residue after the gap as well as the + sequence of gap. Gaps at the N- and C-terminals can be defined by passing + invalid residue handles to `before` or `after`. + + :param before: Fills :attr:`before` + :type before: :class:`ost.mol.ResidueHandle` + :param after: Fills :attr:`after` + :type after: :class:`ost.mol.ResidueHandle` + :param seq: Fills :attr:`seq` + :type seq: :class:`str` + + :raises: A :exc:`RuntimeError` if both residues are invalid or when both + are valid and: + + - residues are from different chains (if both valid) + - `before` is located after `after` + - `seq` has a length which is inconsistent with the gap + + .. method:: GetChainIndex() + + :return: Index of chain, the gap is belonging to + :rtype: :class:`int` + + .. method:: GetChainName() + + :return: Name of chain, the gap is belonging to + :rtype: :class:`str` + + .. method:: GetChain() + + :return: Chain, the gap is belonging to + :rtype: :class:`ost.mol.ChainHandle` + + .. method:: IsNTerminal() + + :return: True, iff gap is N-terminal (i.e. :attr:`before` is invalid + and :attr:`after` is valid) + :rtype: :class:`bool` + + .. method:: IsCTerminal() + + :return: True, iff gap is C-terminal (i.e. :attr:`before` is valid + and :attr:`after` is invalid) + :rtype: :class:`bool` + + .. method:: IsTerminal() + + :return: True, iff gap is N- or C-terminal + :rtype: :class:`bool` + + .. method:: ShiftCTerminal() + + Try to shift gap by one position towards C-terminal. Only possible if new + gap is not terminal and it doesn't try to shift the gap past another gap. + + :return: True, iff shift succeeded (gap is only updated in that case) + :rtype: :class:`bool` + + .. method:: ExtendAtNTerm() + + Try to extend gap at N-terminal end of gap. + Only possible if the gap is not at N-terminal and it doesn't try to + extend the gap past another gap. + + :return: True, iff extend succeeded (gap is only updated in that case) + :rtype: :class:`bool` + + .. method:: ExtendAtCTerm() + + Try to extend gap at C-terminal end of gap. + Only possible if the gap is not at C-terminal and it doesn't try to + extend the gap past another gap. + + :return: True, iff extend succeeded (gap is only updated in that case) + :rtype: :class:`bool` + + .. method:: GetLength() + + :return: Length of the gap. + :rtype: :class:`int` + + .. method:: Copy() + + :return: Copy of the gap. + :rtype: :class:`StructuralGap` + + .. attribute:: length + + Alias for :meth:`GetLength()` (read-only, :class:`int`) + + .. attribute:: seq + + Sequence string for the gap (read-only, :class:`str`) + + .. attribute:: before + + Residue before the gap (read-only, :class:`ost.mol.ResidueHandle`) + + .. attribute:: after + + Residue after the gap (read-only, :class:`ost.mol.ResidueHandle`) + + .. attribute:: full_seq + + Full sequence, including stem residues (read-only) + +.. class:: StructuralGapList + + Represents a :class:`list` of :class:`StructuralGap`. + + +Gap Extender classes +-------------------------------------------------------------------------------- + +The extender classes work on a given :class:`StructuralGap` and provide an +Extend() function to propose new gaps for loop modelling. The function returns +False if no new extension possible. + +.. class:: GapExtender(gap, seqres) + + The extender cycles through the following steps: + + .. code-block:: none + + - + -- + -- + --- + --- + --- + ---- + ---- + ---- + ---- + + :param gap: The gap which will be extended by :meth:`Extend`. + :param seqres: The full sequence of the chain, the gap is associated with. + :type gap: :class:`StructuralGap` + :type seqres: :class:`str` / :class:`ost.seq.SequenceHandle` + + :raises: An exception if a terminal gap is used to construct this. + + .. method:: Extend() + + Tries to extend *gap*. + + :return: False, if the *gap* cannot be extended any further. This happens + if it reaches a terminal or another insertion gap. + Otherwise, the *gap* passed to the constructor is changed. + The gaps are extended with ascending length and will always have + valid termini. + :rtype: :class:`bool` + +.. class:: FullGapExtender(gap, seqres, max_length=-1) + + Cycles as GapExtender, but continues even if another gap was encountered. + + :param gap: The gap which will be extended by :meth:`Extend`. + :param seqres: The full sequence of the chain, the gap is associated with. + :param max_length: - If -1, all possible non-terminal gaps are returned. + - If >= 0, this restricts the max. gap-length + (w/o termini) producable by :meth:`Extend`. + :type gap: :class:`StructuralGap` + :type seqres: :class:`str` / :class:`ost.seq.SequenceHandle` + :type max_length: :class:`int` + + :raises: An exception if a terminal gap is used to construct this. + + .. method:: Extend() + + Tries to extend *gap*. + + :return: False, if the *gap* cannot be extended without exceeding *max_length*. + Otherwise, the *gap* passed to the constructor is changed. + The gaps are extended with ascending length and will always have + valid termini. + :rtype: :class:`bool` + +.. class:: ScoringGapExtender(gap, extension_penalty, penalties, seqres,\ + max_length=-2) + + The extender scores possible gap extensions and returns them in order of + their score when :meth:`Extend` is called. + The score is penalized according to length and according to certain (well + conserved) regions in the structure as defined by *penalties*. + score = num_gap_extensions * `extension_penalty` + sum( `penalties` [i] ) + (i = resnum - 1 of residues in extension) + + :param gap: The gap which will be extended by :meth:`Extend`. + :type gap: :class:`StructuralGap` + :param extension_penalty: Penalty for length of gap. + :type extension_penalty: :class:`float` + :param penalties: Penalty for each residue added to gap. + :type penalties: :class:`list` of :class:`float` + :param seqres: The full sequence of the chain, the gap is associated with. + :type seqres: :class:`str` / :class:`ost.seq.SequenceHandle` + :param max_length: + - If -2, :class:`GapExtender` is used instead of :class:`FullGapExtender` + (i.e. it stops at gaps and termini). + - If -1, all possible non-terminal gaps are returned. + - If >= 0, this restricts the max. gap-length (w/o termini) + producable by :meth:`Extend`. + :type max_length: :class:`int` + + :raises: An exception if a terminal gap is used to construct this. + + .. method:: Extend() + + Tries to extend *gap*. + + :return: False, if the gap cannot be extended any further. + Otherwise, *gap* is changed and returned in ascending score. + The updated *gap* will always have valid termini. + :rtype: :class:`bool` + +.. class:: ShiftExtension(n_num, c_num) + + Implements the underlying extension scheme of the :class:`GapExtender`. + It is not associated to any structural data, it just spits out the + residue numbers according to the extension scheme described above. + + :param n_num: N residue number to start with + :param c_num: C residue number to start with + :type n_num: :class:`int` + :type c_num: :class:`int` + + .. method:: Extend() + + :returns: The next residue numbers for n_stem and c_stem + :rtype: :class:`tuple` + + +Gap Handling Functions +-------------------------------------------------------------------------------- + +.. function:: CountEnclosedGaps(mhandle, gap) + CountEnclosedInsertions(mhandle, gap) + + Counts all gaps from `mhandle` which are fully enclosed by given `gap`. + This is either all gaps or only insertions. + + :param mhandle: Modelling handle on which to apply change. + :type mhandle: :class:`ModellingHandle` + :param gap: Gap defining range in which gaps are to be removed. + :type gap: :class:`StructuralGap` + + :return: Number of gaps. + :rtype: :class:`int` + +.. function:: ClearGaps(mhandle, gap) + + Removes all gaps from `mhandle` which are fully enclosed by given `gap`. + + :param mhandle: Modelling handle on which to apply change. + :type mhandle: :class:`ModellingHandle` + :param gap: Gap defining range in which gaps are to be removed. + :type gap: :class:`StructuralGap` + + :return: Index of next gap in mhandle.gaps after removal. + Returns -1 if last gap was removed or no gaps in *mhandle*. + :rtype: :class:`int` + + :raises: A :exc:`RuntimeError` if any gap in mhandle.gaps is only partially + enclosed by given gap. + +.. function:: MergeGaps(mhandle, index) + + Merges two gaps `mhandle.gaps[index]` and `mhandle.gaps[index+1]`. + The residues in between the gaps are removed from `mhandle.model` and added + to the new `mhandle.gaps[index]`. + + :param mhandle: Modelling handle on which to apply change. + :type mhandle: :class:`ModellingHandle` + :param index: Index of gap to merge with next one. + :type index: :class:`int` + + :raises: A :exc:`RuntimeError` if indices out of range or if trying to merge + gaps of different chains or an N-terminal gap with a C-terminal gap. \ No newline at end of file diff --git a/modelling/doc/index.rst b/modelling/doc/index.rst index 81565649..e20b9f4c 100644 --- a/modelling/doc/index.rst +++ b/modelling/doc/index.rst @@ -6,502 +6,25 @@ .. currentmodule:: promod3.modelling -High-level functionality for protein modelling. -Commonly, your input is a template structure and an alignment of the template to -the desired target sequence. -A protein homology modelling pipeline then has the following main steps: - -- Build a raw model from the template (see :func:`BuildRawModel` function) -- Perform loop modelling to close (or remove) all gaps (see functions - :func:`CloseSmallDeletions`, :func:`RemoveTerminalGaps`, - :func:`MergeGapsByDistance`, :func:`FillLoopsByDatabase`, - :func:`FillLoopsByMonteCarlo`, :func:`CloseLargeDeletions`) -- Build sidechains (see :func:`BuildSidechains` function) -- Minimize energy of final model using molecular mechanics - (see :func:`MinimizeModelEnergy` function) - -The last steps to go from a raw model to a final model can easily be executed -with the :func:`BuildFromRawModel` function. In its simplest form, one can run -a full protein homology modelling pipeline as follows: +High-level functionality for protein modelling. The goal is to model a given +target sequence (or list of sequences for oligomers) given some template data. +Commonly, the template does not cover the full target. This module offers +capabilities to extract useful template data for the target and to fill the +remaining structural data to create a full model of the target. In its simplest +form, you can use a target-template alignment and a template structure to create +a model fully automatically as follows: .. literalinclude:: ../../../tests/doc/scripts/modelling_all.py -If you want to run and tweak the internal steps, you can start with the -following code which is equivalent to the example above: - -.. _modelling_steps_example: - -.. literalinclude:: ../../../tests/doc/scripts/modelling_steps.py - -In the default pipeline above, we call :func:`FillLoopsByDatabase` multiple -times. First, we try to close "easy" gaps which require few extensions (we wish -to limit the damage we do on the template) and for which we have plenty of loop -candidates. If some gaps cannot be closed like this, we try less restrictive -options. This approach is helpful if neighboring gaps are close together and the -one closer to the C-terminus is easier to close. Several variants of the -pipeline were evaluated on 1752 target-template-pairs and this one worked best. - - -Modelling Pipeline --------------------------------------------------------------------------------- - -.. function:: BuildRawModel(aln, include_ligands=False, chain_names=\ - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz",\ - spdbv_style=False) - - Builds a raw (pseudo) model from the alignment. Can either take a single - alignment handle or an alignment handle list. Every list item is treated as a - single chain in the final raw model. - - Each alignment handle must contain exactly two sequences and the second - sequence is considered the template sequence, which must have a - :class:`~ost.mol.EntityView` attached. - - This is a basic protein core modelling algorithm that copies backbone - coordinates based on the sequence alignment. For matching residues, the - side chain coordinates are also copied. Gaps are ignored. Hydrogen an - deuterium atoms are not copied into the model. - - The function tries to reuse as much as possible from the template. Modified - residues are treated as follows: - - - Selenium methionine residues are converted to methionine - - - Side chains which contain all atoms of the parent amino acid, e.g. - phosphoserine are copied as a whole with the modifications stripped off. - - Residues with missing backbone atoms and D-peptides are generally skipped and - treated as gaps. Missing Cbeta atoms in backbone are ok and reconstructed. - If all residues are skipped (e.g. Calpha traces), we report an error and - return an empty model. - - Residue numbers are set such that missing residue in gaps are honoured and - subsequent loop modelling can insert new residues without having to renumber. - **The numbering of residues starts for every chain with the value 1**. - - The returned :class:`ModellingHandle` stores the obtained raw model as well - as information about insertions and deletions in the gaps list. - - :param aln: Single alignment handle for raw model with single chain or - list of alignment handles for raw model with multiple chains. - :type aln: :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList` - - :param include_ligands: True, if we wish to include ligands in the model. This - searches for ligands in all OST handles of the views - attached to the alignments. Ligands are identified - with the `ligand` property in the handle (set by OST - based on HET records) or by the chain name '_' (as set - in SMTL). All ligands are added to a new chain named - '_'. - :type include_ligands: :class:`bool` - - :param chain_names: Chains are named by a single chanacter taken from this. - :type chain_names: :class:`str` - - :param spdbv_style: True, if we need a model in the old SPDBV style. - :type spdbv_style: :class:`bool` - - :return: Raw (pseudo) model from the alignment. - :rtype: :class:`ModellingHandle` - - :raises: A :exc:`RuntimeError` when: - - - the alignments do not have two sequences - - the second sequence does not have an attached structure - - the residues of the template structure do not match with the - alignment sequence (note that you can set an "offset" (see - :meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`) for the - template sequence (but not for the target)) - - the target sequence has a non-zero offset (cannot be honored as - the resulting model will always start its residue numbering at 1) - -.. autofunction:: BuildFromRawModel - -.. autofunction:: BuildSidechains - -.. autofunction:: MinimizeModelEnergy - -.. autofunction:: CheckFinalModel - -.. autofunction:: RunMolProbity - -.. autofunction:: RunMolProbityEntity - -.. autofunction:: ReportMolProbityScores - - -Closing Gaps --------------------------------------------------------------------------------- - -.. function:: SetupDefaultBackboneScorer(mhandle) - - Setup scorers and environment for medling with backbones. - This one is already tailored towards a certain modelling job. - - :param mhandle: The modelling handle this scorer should be dedicated to. - This will set the properties `backbone_scorer` and - `backbone_scorer_env` of `mhandle. - - :type mhandle: :class:`~promod3.modelling.ModellingHandle` - -.. autofunction:: CloseSmallDeletions - -.. function:: RemoveTerminalGaps(mhandle) - - Removes terminal gaps without modelling them (just removes them from the list - of gaps). This is useful for pipelines which lack the possibility to properly - model loops at the termini. - - :param mhandle: Modelling handle on which to apply change. - :type mhandle: :class:`ModellingHandle` - - :return: Number of gaps which were removed. - :rtype: :class:`int` - -.. autofunction:: MergeGapsByDistance - -.. autofunction:: FillLoopsByDatabase - -.. autofunction:: FillLoopsByMonteCarlo - -.. autofunction:: ModelTermini - -.. autofunction:: CloseLargeDeletions - -.. function:: CountEnclosedGaps(mhandle, gap) - CountEnclosedInsertions(mhandle, gap) - - Counts all gaps from `mhandle` which are fully enclosed by given `gap`. - This is either all gaps or only insertions. - - :param mhandle: Modelling handle on which to apply change. - :type mhandle: :class:`ModellingHandle` - :param gap: Gap defining range in which gaps are to be removed. - :type gap: :class:`StructuralGap` - - :return: Number of gaps. - :rtype: :class:`int` - -.. function:: ClearGaps(mhandle, gap) - - Removes all gaps from `mhandle` which are fully enclosed by given `gap`. - - :param mhandle: Modelling handle on which to apply change. - :type mhandle: :class:`ModellingHandle` - :param gap: Gap defining range in which gaps are to be removed. - :type gap: :class:`StructuralGap` - - :return: Index of next gap in mhandle.gaps after removal. - Returns -1 if last gap was removed or no gaps in *mhandle*. - :rtype: :class:`int` - - :raises: A :exc:`RuntimeError` if any gap in mhandle.gaps is only partially - enclosed by given gap. - -.. function:: MergeGaps(mhandle, index) - - Merges two gaps `mhandle.gaps[index]` and `mhandle.gaps[index+1]`. - The residues in between the gaps are removed from `mhandle.model` and added - to the new `mhandle.gaps[index]`. - - :param mhandle: Modelling handle on which to apply change. - :type mhandle: :class:`ModellingHandle` - :param index: Index of gap to merge with next one. - :type index: :class:`int` - - :raises: A :exc:`RuntimeError` if indices out of range or if trying to merge - gaps of different chains or an N-terminal gap with a C-terminal gap. - - -Modelling Handle class --------------------------------------------------------------------------------- - -.. class:: ModellingHandle - - Handles the result for structure model building and provides high-level methods - to turn an initial raw model (see :func:`~promod3.modelling.BuildRawModel`) - into a complete protein model by removing any existing gaps. - - .. attribute:: model - - The resulting model. This includes one chain per target chain (in the same - order as the sequences in `seqres`) and (if they were included) a chain - named '_' for ligands. You can therefore access `model.chains` items and - `seqres` items with the same indexing and the optional ligand chain follows - afterwards. - - :type: :class:`~ost.mol.EntityHandle` - - .. attribute:: gaps - - List of gaps in the model that could not be copied from the template. These - gaps may be the result of insertions/deletions in the alignment or due to - missing or incomplete backbone coordinates in the template structure. - Gaps of different chains are appended one after another. - - :type: :class:`StructuralGapList` - - .. attribute:: seqres - - List of sequences with one :class:`~ost.seq.SequenceHandle` for each chain - of target protein. - - :type: :class:`~ost.seq.SequenceList` - -Gap classes --------------------------------------------------------------------------------- - -.. class:: StructuralGap(before, after, seq) - - Describes a structural gap, i.e. a loop to be modeled. The gap may either be - terminal or between two defined regions. The gap stores information of the - last residue before and the first residue after the gap as well as the - sequence of gap. Gaps at the N- and C-terminals can be defined by passing - invalid residue handles to `before` or `after`. - - :param before: Fills :attr:`before` - :type before: :class:`ost.mol.ResidueHandle` - :param after: Fills :attr:`after` - :type after: :class:`ost.mol.ResidueHandle` - :param seq: Fills :attr:`seq` - :type seq: :class:`str` - - :raises: A :exc:`RuntimeError` if both residues are invalid or when both - are valid and: - - - residues are from different chains (if both valid) - - `before` is located after `after` - - `seq` has a length which is inconsistent with the gap - - .. method:: GetChainIndex() - - :return: Index of chain, the gap is belonging to - :rtype: :class:`int` - - .. method:: GetChainName() - - :return: Name of chain, the gap is belonging to - :rtype: :class:`str` - - .. method:: GetChain() - - :return: Chain, the gap is belonging to - :rtype: :class:`ost.mol.ChainHandle` - - .. method:: IsNTerminal() - - :return: True, iff gap is N-terminal (i.e. :attr:`before` is invalid - and :attr:`after` is valid) - :rtype: :class:`bool` - - .. method:: IsCTerminal() - - :return: True, iff gap is C-terminal (i.e. :attr:`before` is valid - and :attr:`after` is invalid) - :rtype: :class:`bool` - - .. method:: IsTerminal() - - :return: True, iff gap is N- or C-terminal - :rtype: :class:`bool` - - .. method:: ShiftCTerminal() - - Try to shift gap by one position towards C-terminal. Only possible if new - gap is not terminal and it doesn't try to shift the gap past another gap. - - :return: True, iff shift succeeded (gap is only updated in that case) - :rtype: :class:`bool` - - .. method:: ExtendAtNTerm() - - Try to extend gap at N-terminal end of gap. - Only possible if the gap is not at N-terminal and it doesn't try to - extend the gap past another gap. - - :return: True, iff extend succeeded (gap is only updated in that case) - :rtype: :class:`bool` - - .. method:: ExtendAtCTerm() - - Try to extend gap at C-terminal end of gap. - Only possible if the gap is not at C-terminal and it doesn't try to - extend the gap past another gap. - - :return: True, iff extend succeeded (gap is only updated in that case) - :rtype: :class:`bool` - - .. method:: GetLength() - - :return: Length of the gap. - :rtype: :class:`int` - - .. method:: Copy() - - :return: Copy of the gap. - :rtype: :class:`StructuralGap` - - .. attribute:: length - - Alias for :meth:`GetLength()` (read-only, :class:`int`) - - .. attribute:: seq - - Sequence string for the gap (read-only, :class:`str`) - - .. attribute:: before - - Residue before the gap (read-only, :class:`ost.mol.ResidueHandle`) - - .. attribute:: after - - Residue after the gap (read-only, :class:`ost.mol.ResidueHandle`) - - .. attribute:: full_seq - - Full sequence, including stem residues (read-only) - -.. class:: StructuralGapList - - Represents a :class:`list` of :class:`StructuralGap`. - - -Gap Extender classes --------------------------------------------------------------------------------- - -The extender classes work on a given :class:`StructuralGap` and provide an -Extend() function to propose new gaps for loop modelling. The function returns -False if no new extension possible. - -.. class:: GapExtender(gap, seqres) - - The extender cycles through the following steps: - - .. code-block:: none - - - - -- - -- - --- - --- - --- - ---- - ---- - ---- - ---- - - :param gap: The gap which will be extended by :meth:`Extend`. - :param seqres: The full sequence of the chain, the gap is associated with. - :type gap: :class:`StructuralGap` - :type seqres: :class:`str` / :class:`ost.seq.SequenceHandle` - - :raises: An exception if a terminal gap is used to construct this. - - .. method:: Extend() - - Tries to extend *gap*. - - :return: False, if the *gap* cannot be extended any further. This happens - if it reaches a terminal or another insertion gap. - Otherwise, the *gap* passed to the constructor is changed. - The gaps are extended with ascending length and will always have - valid termini. - :rtype: :class:`bool` - -.. class:: FullGapExtender(gap, seqres, max_length=-1) - - Cycles as GapExtender, but continues even if another gap was encountered. - - :param gap: The gap which will be extended by :meth:`Extend`. - :param seqres: The full sequence of the chain, the gap is associated with. - :param max_length: - If -1, all possible non-terminal gaps are returned. - - If >= 0, this restricts the max. gap-length - (w/o termini) producable by :meth:`Extend`. - :type gap: :class:`StructuralGap` - :type seqres: :class:`str` / :class:`ost.seq.SequenceHandle` - :type max_length: :class:`int` - - :raises: An exception if a terminal gap is used to construct this. - - .. method:: Extend() - - Tries to extend *gap*. - - :return: False, if the *gap* cannot be extended without exceeding *max_length*. - Otherwise, the *gap* passed to the constructor is changed. - The gaps are extended with ascending length and will always have - valid termini. - :rtype: :class:`bool` - -.. class:: ScoringGapExtender(gap, extension_penalty, penalties, seqres,\ - max_length=-2) - - The extender scores possible gap extensions and returns them in order of - their score when :meth:`Extend` is called. - The score is penalized according to length and according to certain (well - conserved) regions in the structure as defined by *penalties*. - score = num_gap_extensions * `extension_penalty` + sum( `penalties` [i] ) - (i = resnum - 1 of residues in extension) - - :param gap: The gap which will be extended by :meth:`Extend`. - :type gap: :class:`StructuralGap` - :param extension_penalty: Penalty for length of gap. - :type extension_penalty: :class:`float` - :param penalties: Penalty for each residue added to gap. - :type penalties: :class:`list` of :class:`float` - :param seqres: The full sequence of the chain, the gap is associated with. - :type seqres: :class:`str` / :class:`ost.seq.SequenceHandle` - :param max_length: - - If -2, :class:`GapExtender` is used instead of :class:`FullGapExtender` - (i.e. it stops at gaps and termini). - - If -1, all possible non-terminal gaps are returned. - - If >= 0, this restricts the max. gap-length (w/o termini) - producable by :meth:`Extend`. - :type max_length: :class:`int` - - :raises: An exception if a terminal gap is used to construct this. - - .. method:: Extend() - - Tries to extend *gap*. - - :return: False, if the gap cannot be extended any further. - Otherwise, *gap* is changed and returned in ascending score. - The updated *gap* will always have valid termini. - :rtype: :class:`bool` - -.. class:: ShiftExtension(n_num, c_num) - - Implements the underlying extension scheme of the :class:`GapExtender`. - It is not associated to any structural data, it just spits out the - residue numbers according to the extension scheme described above. - - :param n_num: N residue number to start with - :param c_num: C residue number to start with - :type n_num: :class:`int` - :type c_num: :class:`int` - - .. method:: Extend() - - :returns: The next residue numbers for n_stem and c_stem - :rtype: :class:`tuple` - - -Detecting ring punches --------------------------------------------------------------------------------- - -.. autofunction:: GetRings - -.. autofunction:: GetRingPunches - -.. autofunction:: HasRingPunches - -.. autofunction:: FilterCandidates +The various steps involved protein modelling are described here: -.. autofunction:: FilterCandidatesWithSC +.. toctree:: + :maxdepth: 2 -.. LocalWords: currentmodule promod aln AttachView BuildRawModel pdb calpha -.. LocalWords: ModellingHandle StructuralGapList modelling Modelling os ost -.. LocalWords: tempfile io LoadAlignment LoadPDB fh fn API -.. LocalWords: modelling phosphoserine param exc RuntimeError automethod -.. LocalWords: CloseSmallDeletions + pipeline + model_checking + gap_handling + loop_candidates + loop_closing + monte_carlo + \ No newline at end of file diff --git a/loop/doc/helper_classes.rst b/modelling/doc/loop_candidates.rst similarity index 66% rename from loop/doc/helper_classes.rst rename to modelling/doc/loop_candidates.rst index 967065aa..584e485d 100644 --- a/loop/doc/helper_classes.rst +++ b/modelling/doc/loop_candidates.rst @@ -1,130 +1,31 @@ -Helper Classes +Handling Loop Candidates ================================================================================ -.. currentmodule:: promod3.loop +.. currentmodule:: promod3.modelling -This chapter describes helper classes, that are somehow related to the -functionality in the loop module. +For convenience, we provide the :class:`LoopCandidates` class as a container of +loop candidates with consistent length and sequence among them. It can either be +filled manually or generated using static filling functions using the +functionality from the :class:`~promod3.loop.FragDB` or Monte Carlo algorithms. +Once it contains candidates you can apply closing, scoring or clustering +algorithms on all loop candidates. Example: -Psipred --------------------------------------------------------------------------------- - - - - -.. class:: PsipredPrediction - - A container for the secondary structure prediction by Psipred. - - .. method:: PsipredPrediction() - - Constructs empty container - - .. method:: PsipredPrediction(prediction, confidence) - - Constructs container with given content - - :param prediction: Secondary structure prediction as element in ['H','E','C'] - :param confidence: Confidence of prediction as element in [0,9] - - :type prediction: :class:`list` - :type confidence: :class:`list` - - :raises: :exc:`~exceptions.RuntimeError` if size of **prediction** and - **confidence** are inconsistent or if they contain an invalid - element - - .. method:: FromHHM(filename) - - Static function to Load a :class:`PsipredPrediction` object from hhm file, - as they are provided by the hhsearch suite - - :param filename: Name of file - :type filename: :class:`str` - - .. method:: FromHoriz(filename) - - Static function to Load a :class:`PsipredPrediction` object from horiz file, - as they are produced by the psipred executable - - :param filename: Name of file - :type filename: :class:`str` - - .. method:: Add(prediction, confidence) - - Adds and appends a single residue psipred prediction at the end - - :param prediction: Prediction, must be one in ['H','E','C'] - :param confidence: Confidence of prediction, must be in [0,9] - - :type prediction: :class:`str` - :type confidence: :class:`int` - - :raises: :exc:`~exceptions.RuntimeError` if input contains invalid elements - - .. method:: Extract(from, to) - - Extracts content and returns a sub-:class:`PsipredPrediction` with range **from** - to **to**, not including **to** itself - - :param from: Idx to start - :param to: Idx to end - - :type from: :class:`int` - :type to: :class:`int` - - :returns: :class:`PsipredPrediction` with the specified range +.. literalinclude:: ../../../tests/doc/scripts/modelling_loop_candidates.py - :raises: :exc:`~exceptions.RuntimeError` if **from** or **to** are invalid - - - - .. method:: GetPrediction(idx) - - :param idx: Index to get prediction from - :type idx: :class:`int` - :returns: Psipred prediction at pos **idx** - :raises: :exc:`~exceptions.RuntimeError` if **idx** is invalid - - .. method:: GetConfidence(idx) - - :param idx: Index to get confidence from - :type idx: :class:`int` - :returns: Psipred confidence at pos **idx** - :raises: :exc:`~exceptions.RuntimeError` if **idx** is invalid - - .. method:: GetPredictions() - - Get all the predictions in the container - - :returns: :class:`list` containing all the predictions in the container - - - .. method:: GetConfidences() - - Get all the confidences in the container - - :returns: :class:`list` containing all the confidences in the container - - - .. method:: __len__() - - :returns: Number of elements in container - - - -Handling several loops at once +The LoopCandidate class -------------------------------------------------------------------------------- - - .. class:: LoopCandidate(backbone_list) - Object representing a possible configuration of the backbone of a particular loop being modelled. Apart from the positions, stored in a :class:`BackboneList`, it also has attributes storing different scores associated with the candidate (will be set by calling the corresponding scoring function in :class:`LoopCandidates`). + Object representing a possible configuration of the backbone of a particular + loop being modelled. Apart from the positions, stored in a + :class:`~promod3.loop.BackboneList`, it also has attributes storing different + scores associated with the candidate (will be set by calling the corresponding + scoring function in :class:`LoopCandidates`). :param backbone_list: Positions of the backbone atoms - :type backbone_list: :class:`BackboneList` + :type backbone_list: :class:`~promod3.loop.BackboneList` .. method:: CARMSD(other) @@ -146,7 +47,7 @@ Handling several loops at once The backbone list containing the positions of the backbone atoms - :type: :class:`BackboneList` + :type: :class:`~promod3.loop.BackboneList` .. attribute:: clash_score @@ -212,20 +113,16 @@ Handling several loops at once :type: :class:`float` -.. class:: LoopCandidates(seq) - - The *LoopCandidates* is a helper class used as a container for - :class:`LoopCandidate` objects enforcing consistent length - and sequence among them. It can either be filled manually or - generated using static filling functions using the functionality - from the :class:`FragDB` or Monte Carlo algorithms. Once it contains - candidates you can apply closing, scoring or clustering algorithms on the content. - Note, that you have to attach a scorer object before any scoring action. +The LoopCandidates class +-------------------------------------------------------------------------------- - :param seq: The sequence being enforced for all candidates +.. class:: LoopCandidates(seq) - :type sequence: :class:`str` + Initializes an empty container of loop candidates. Candidates can be accessed + and iterated as if it was a :class:`list` of :class:`LoopCandidate`. + :param seq: The sequence being enforced for all candidates + :type seq: :class:`str` .. staticmethod:: FillFromDatabase(n_stem, c_stem, seq, frag_db, \ structural_db, extended_search=False) @@ -245,8 +142,8 @@ Handling several loops at once :type n_stem: :class:`ost.mol.ResidueHandle` :type c_stem: :class:`ost.mol.ResidueHandle` :type seq: :class:`str` - :type frag_db: :class:`FragDB` - :type structural_db: :class:`StructureDB` + :type frag_db: :class:`~promod3.loop.FragDB` + :type structural_db: :class:`~promod3.loop.StructureDB` :type extended_search: :class:`bool` :returns: A list of loop candidates @@ -276,7 +173,7 @@ Handling several loops at once For every monte carlo run, the random number generator gets refreshed and this seed gets increased by 1. - :type initial_bb: :class:`BackboneList` + :type initial_bb: :class:`~promod3.loop.BackboneList` :type seq: :class:`str` :type num_loops: :class:`int` :type steps: :class:`int` @@ -325,33 +222,32 @@ Handling several loops at once match those of *n_stem* and *c_stem*). CCD (cyclic coordinate descent, see :class:`CCD`) is an iterative minimization algorithm. - If *torsion_sampler* or *torsion_samplers* is given, it is used at each step - of the closing to calculate the probability of the proposed move, which is - then accepted or not depending on a metropolis criterium. + If *torsion_sampler* is given, it is used at each step of the closing to + calculate the probability of the proposed move, which is then accepted or + not depending on a metropolis criterium. :param n_stem: Residue defining the n-stem positions every candidate should match. See :meth:`~CCD.CCD()`. :param c_stem: Residue defining the c-stem positions every candidate should match. See :meth:`~CCD.CCD()`. - :param torsion_sampler: The torsion sampler - :param torsion_samplers: A list containing one torsion sampler for each - residue in the loop - :param max_iterations: Maximum number of iteration - :param rmsd_cutoff: Cutoff in stem residue RMSD used to determine - convergence - :param keep_non_converged: Whether to keep loop candidates for which the - closing did not converge - :param random_seed: seed for random number generator used to - accept/reject moves in CCD algorithm - - :type n_stem: :class:`ost.mol.ResidueHandle` - :type c_stem: :class:`ost.mol.ResidueHandle` - :type torsion_sampler: :class:`TorsionSampler` - :type torsion_samplers: :class:`list` of :class:`TorsionSampler` - :type max_iterations: :class:`int` - :type rmsd_cutoff: :class:`float` - :type keep_non_converged: :class:`bool` - :type random_seed: :class:`int` + :param torsion_sampler: A torsion sampler (used for all residues) or a list + of samplers (one per residue). + :param max_iterations: Maximum number of iterations + :param rmsd_cutoff: Cutoff in stem residue RMSD used to determine + convergence + :param keep_non_converged: Whether to keep loop candidates for which the + closing did not converge + :param random_seed: seed for random number generator used to + accept/reject moves in CCD algorithm + + :type n_stem: :class:`ost.mol.ResidueHandle` + :type c_stem: :class:`ost.mol.ResidueHandle` + :type torsion_sampler: :class:`~promod3.loop.TorsionSampler` / :class:`list` + of :class:`~promod3.loop.TorsionSampler` + :type max_iterations: :class:`int` + :type rmsd_cutoff: :class:`float` + :type keep_non_converged: :class:`bool` + :type random_seed: :class:`int` .. method:: ApplyKIC(n_stem, c_stem, pivot_one, pivot_two, pivot_three) @@ -394,8 +290,8 @@ Handling several loops at once Add a loop candidate to the list of candidates from a list of backbone positions - :param bb_list: The backbone list - :type bb_list: :class:`BackboneList` + :param bb_list: The backbone list + :type bb_list: :class:`~promod3.loop.BackboneList` :raises: :exc:`~exceptions.RuntimeError` If sequence of *bb_list* is not consistent with internal sequence @@ -404,8 +300,8 @@ Handling several loops at once Remove a loop candidate from the list of candidates. - :param index: The index of the candidate that will be removed - :type index: :class:`int` + :param index: The index of the candidate that will be removed + :type index: :class:`int` .. method:: CalculateScores(scorer, key, start_resnum, chain_index=0) @@ -440,5 +336,3 @@ Handling several loops at once :type linear_weights: :class:`dict` :type start_resnum: :class:`int` :type chain_index: :class:`int` - - \ No newline at end of file diff --git a/loop/doc/loop_closing.rst b/modelling/doc/loop_closing.rst similarity index 62% rename from loop/doc/loop_closing.rst rename to modelling/doc/loop_closing.rst index c6d34eca..96b2e418 100644 --- a/loop/doc/loop_closing.rst +++ b/modelling/doc/loop_closing.rst @@ -1,54 +1,41 @@ -Loop Closing +Fitting Loops Into Gaps ================================================================================ -.. currentmodule:: promod3.loop - - -Loops, represented as :class:`BackboneList` objects, often need to undergo -conformational changes to fit into gaps defined by stem residues. -|project| implements two algorithms performing this task. - - * There is cyclic coordinate descent (CCD) [canutescu2003]_ - - - * and kinematic closure (KIC) [mandell2009]_ - - -In case of small gaps or small issues in the :class:`BackboneList` you might -also consider the :class:`BackboneRelaxer`. +.. currentmodule:: promod3.modelling +Loops often need to undergo conformational changes to fit into gaps defined by +stem residues. |project| implements two algorithms performing this task: + * Cyclic coordinate descent (CCD) [canutescu2003]_ + * Kinematic closure (KIC) [mandell2009]_ +In case of small gaps or small issues in the loop you might also consider the +:class:`BackboneRelaxer`. CCD -------------------------------------------------------------------------------- -The |project| implementation of the cyclic coordinate descent first superposes -the n-stem of the input :class:`BackboneList` with the provided n-stem -positions. In every iteration of the algorithm, one residue of the -:class:`BackboneList` gets randomly selected. If the residue is not the n-stem, -the ideal phi angle, that minimizes the RMSD between the :class:`BackboneList` -c-stem and the target c-stem positions gets calculated and directly applied. -The same thing is done for the psi angle if the selected residue is not the -c-stem. Iterations continue until a c-stem RMSD threshold is reached or number -of iterations hit a limit. -By performing CCD, unfavourable backbone dihedral pairs can be introduced. -It is therefore optionally possible to use :class:`TorsionSampler` to guide the -iterative process. -In this case, the algorithm calculates the probablity of observing the dihedral -pair before and after performing the phi/psi update. If the fraction -after/before is smaller than a random number in range [0;1[, the proposed -dihedral pair gets rejected and the next iteration starts. Please note, that -this increases the probability of non-convergence. - +The |project| implementation of the cyclic coordinate descent first superposes +the n-stem of the input loop with the provided n-stem positions. In every +iteration of the algorithm, we loop over all residues of the loop and find the +ideal phi/psi angles to minimize the RMSD between the c-stem and the target +c-stem positions. Iterations continue until a c-stem RMSD threshold is reached +or the number of iterations hits a limit. By performing CCD, unfavorable +backbone dihedral pairs can be introduced. It is therefore optionally possible +to use torsion samplers to guide the iterative process. In this case, the +algorithm calculates the probability of observing the dihedral pair before and +after performing the phi/psi update. If the fraction after/before is smaller +than a uniform random number in the range [0,1[, the proposed dihedral pair gets +rejected. Please note, that this increases the probability of non-convergence. .. class:: CCD Class, that sets up everything you need to perform a particular loop closing action. - .. method:: CCD(sequence, n_stem, c_stem, torsion_sampler, max_steps, rmsd_cutoff, seed) + .. method:: CCD(sequence, n_stem, c_stem, torsion_sampler, max_steps, \ + rmsd_cutoff, seed) All runs with this CCD object will be with application of torsion samplers to avoid moving into unfavourable regions of the backbone dihedrals. @@ -62,13 +49,13 @@ this increases the probability of non-convergence. If the residue after *c_stem* doesn't exist, the torsion sampler will use a default residue (ALA) and psi angle (-0.7854) to evaluate the last angle. - :param torsion_sampler: To extract probabilities for the analysis of the backbone - dihedrals. You either pass a list with :class:`TorsionSampler` - objects for every residue of the loop to be closed or a single - :class:`TorsionSampler`, that will be applied on all residues. + :param torsion_sampler: To extract probabilities for the analysis of the + backbone dihedrals. Either a list of torsion + samplers (one for for every residue of the loop to + be closed) or a single one (used for all residues). :param max_steps: Maximal number of iterations - :param rmsd_cutoff: The algorithm stops as soon as the c_stem of the loop to be - closed has RMSD below the **c_stem** + :param rmsd_cutoff: The algorithm stops as soon as the c_stem of the loop to + be closed has RMSD below the *c_stem* :param seed: Seed of random number generator to decide whether new phi/psi pair should be accepted. @@ -76,7 +63,8 @@ this increases the probability of non-convergence. :type sequence: :class:`str` :type n_stem: :class:`ost.mol.ResidueHandle` :type c_stem: :class:`ost.mol.ResidueHandle` - :type torsion_sampler: :class:`TorsionSampler` / :class:`list` + :type torsion_sampler: :class:`~promod3.loop.TorsionSampler` / :class:`list` + of :class:`~promod3.loop.TorsionSampler` :type max_steps: :class:`int` :type rmsd_cutoff: :class:`float` :type seed: :class:`int` @@ -94,8 +82,8 @@ this increases the probability of non-convergence. :param n_stem: Residue defining the n_stem :param c_stem: Residue defining the c_stem :param max_steps: Maximal number of iterations - :param rmsd_cutoff: The algorithm stops as soon as the c_stem of the loop to be - closed has RMSD below the **c_stem** + :param rmsd_cutoff: The algorithm stops as soon as the c_stem of the loop to + be closed has RMSD below the given *c_stem* :type n_stem: :class:`ost.mol.ResidueHandle` :type c_stem: :class:`ost.mol.ResidueHandle` @@ -104,18 +92,18 @@ this increases the probability of non-convergence. .. method:: Close(bb_list) - Closes given **bb_list** with the settings set at initialization. - - :param bb_list: Loop to be closed + Closes given *bb_list* with the settings set at initialization. - :type bb_list: :class:`BackboneList` + :param bb_list: Loop to be closed + :type bb_list: :class:`~promod3.loop.BackboneList` - :returns: :class:`bool` Whether **rmsd_cutoff** has been reached + :return: Whether *rmsd_cutoff* has been reached + :rtype: :class:`bool` :raises: :exc:`~exceptions.RuntimeError` if the CCD object has been - initialized with :class:`TorsionSampler` support and the length of - the **bb_list** is not consistent with the initial sequence. - + initialized with :class:`~promod3.loop.TorsionSampler` support + and the length of the *bb_list* is not consistent with the initial + *sequence*. KIC @@ -149,33 +137,31 @@ at these pivot residues. Due to the internal mathematical formalism, up to :param pivot_two: Index of second pivot residue :param pivot_three: Index of third pivot residue - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` :type pivot_one: :class:`int` :type pivot_two: :class:`int` :type pivot_three: :class:`int` - :returns: List of :class:`BackboneList` objects representing - the closed loops. There is a maximum of 16 entries. + :return: List of closed loops (maximum of 16 entries) + :rtype: :class:`list` of :class:`~promod3.loop.BackboneList` :raises: :exc:`~exceptions.RuntimeError` in case of invalid pivot indices. - Relaxing Backbones -------------------------------------------------------------------------------- -In many cases one wants to quickly relax a :class:`BackboneList`. This -can be useful to close small gaps in the backbone or resolve the most -severe clashes. The :class:`BackboneRelaxer` internally sets up a topology -for the input :class:`BackboneList` based on the CHARMM27 forcefield. -Once setup, every :class:`BackboneList` of same length and sequence -can be relaxed by the relaxer. +In many cases one wants to quickly relax a loop. This can be useful to close +small gaps in the backbone or resolve the most severe clashes. The +:class:`BackboneRelaxer` internally sets up a topology for the input loop based +on the CHARMM27 forcefield. Once setup, every loop of the same length and +sequence can be relaxed by the relaxer. -.. class:: BackboneRelaxer(bb_list, [fix_nterm=True, fix_cterm=True]) +.. class:: BackboneRelaxer(bb_list, fix_nterm=True, fix_cterm=True) - Sets up a molecular mechanics topology for given **bb_list**. Every - :class:`BackboneList` of same length and sequence can then be + Sets up a molecular mechanics topology for given *bb_list*. Every + :class:`~promod3.loop.BackboneList` of same length and sequence can then be relaxed. :param bb_list: Basis for topology creation @@ -184,16 +170,17 @@ can be relaxed by the relaxer. :param fix_cterm: Whether c-terminal backbone positions should kept rigid during relaxation. - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` :type fix_nterm: :class:`bool` :type fix_cterm: :class:`bool` -.. class:: BackboneRelaxer(bb_list, density, resolution, [fix_nterm=True, fix_cterm=True]) +.. class:: BackboneRelaxer(bb_list, density, resolution, fix_nterm=True, \ + fix_cterm=True) - Sets up a molecular mechanics topology for given **bb_list** including - density information. Every :class:`BackboneList` of same length and - sequence can then be relaxed. + Sets up a molecular mechanics topology for given *bb_list* including + density information. Every :class:`~promod3.loop.BackboneList` of same length + and sequence can then be relaxed. :param bb_list: Basis for topology creation :param density: Density used as a target for the internal density force @@ -203,16 +190,16 @@ can be relaxed by the relaxer. :param fix_cterm: Whether c-terminal backbone positions should kept rigid during relaxation. - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` :type density: :class:`ost.img.ImageHandle` :type resolution: :class:`float` :type fix_nterm: :class:`bool` :type fix_cterm: :class:`bool` - :raises: :exc:`~exceptions.RuntimeError` if size of **bb_list** is below 2 + :raises: :exc:`~exceptions.RuntimeError` if size of *bb_list* is below 2 - .. method:: AddNRestraint(idx, pos, [force_constant = 100000]) + .. method:: AddNRestraint(idx, pos, force_constant=100000) Adds harmonic position restraint for nitrogen atom at specified residue @@ -224,9 +211,9 @@ can be relaxed by the relaxer. :type pos: :class:`ost.geom.Vec3` :type force_constant: :class:`float` - :raises: :class:`RuntimeError` if idx is too large + :raises: :exc:`~exceptions.RuntimeError` if *idx* is too large - .. method:: AddCARestraint(idx, pos, [force_constant = 100000]) + .. method:: AddCARestraint(idx, pos, force_constant=100000) Adds harmonic position restraint for CA atom at specified residue @@ -238,9 +225,9 @@ can be relaxed by the relaxer. :type pos: :class:`ost.geom.Vec3` :type force_constant: :class:`float` - :raises: :class:`RuntimeError` if idx is too large + :raises: :exc:`~exceptions.RuntimeError` if *idx* is too large - .. method:: AddCBRestraint(idx, pos, [force_constant = 100000]) + .. method:: AddCBRestraint(idx, pos, force_constant=100000) Adds harmonic position restraint for CB atom at specified residue, doesn't do anything if specified residue is a glycine @@ -253,9 +240,9 @@ can be relaxed by the relaxer. :type pos: :class:`ost.geom.Vec3` :type force_constant: :class:`float` - :raises: :class:`RuntimeError` if idx is too large + :raises: :exc:`~exceptions.RuntimeError` if *idx* is too large - .. method:: AddCRestraint(idx, pos, [force_constant = 100000]) + .. method:: AddCRestraint(idx, pos, force_constant=100000) Adds harmonic position restraint for C atom at specified residue @@ -267,9 +254,9 @@ can be relaxed by the relaxer. :type pos: :class:`ost.geom.Vec3` :type force_constant: :class:`float` - :raises: :class:`RuntimeError` if idx is too large + :raises: :exc:`~exceptions.RuntimeError` if *idx* is too large - .. method:: AddORestraint(idx, pos, [force_constant = 100000]) + .. method:: AddORestraint(idx, pos, force_constant=100000) Adds harmonic position restraint for O atom at specified residue @@ -281,9 +268,9 @@ can be relaxed by the relaxer. :type pos: :class:`ost.geom.Vec3` :type force_constant: :class:`float` - :raises: :class:`RuntimeError` if idx is too large + :raises: :exc:`~exceptions.RuntimeError` if *idx* is too large - .. method:: Run(bb_list, [steps = 100, stop_criterion = 0.01]) + .. method:: Run(bb_list, steps=100, stop_criterion=0.01) Performs steepest descent on given BackboneList. @@ -293,18 +280,17 @@ can be relaxed by the relaxer. falls below that threshold, the relaxation aborts. - :type bb_list: :class:`BackboneList` - :type steps: :class:`steps` + :type bb_list: :class:`~promod3.loop.BackboneList` + :type steps: :class:`int` :type stop_criterion: :class:`float` - :returns: Forcefield energy upon relaxation + :return: Forcefield energy upon relaxation + :rtype: :class:`float` - :raises: :exc:`~exceptions.RuntimeError` if **bb_list** has not the same + :raises: :exc:`~exceptions.RuntimeError` if *bb_list* has not the same size or sequence as the initial one. - - .. [canutescu2003] Canutescu AA and Dunbrack RL Jr. (2003). Cyclic coordinate descent: A robotics algorithm for protein loop closure. Protein Sci. 12(5):963–972. -.. [mandell2009] Mandell DJ, Coutsias EA and Kortemme T (2009). Sub-angstrom accuracy in protein loop reconstruction by robotics-inspired conformational sampling. Nat Methods. 6(8):551-2. \ No newline at end of file +.. [mandell2009] Mandell DJ, Coutsias EA and Kortemme T (2009). Sub-angstrom accuracy in protein loop reconstruction by robotics-inspired conformational sampling. Nat Methods. 6(8):551-2. diff --git a/modelling/doc/model_checking.rst b/modelling/doc/model_checking.rst new file mode 100644 index 00000000..97d7f69a --- /dev/null +++ b/modelling/doc/model_checking.rst @@ -0,0 +1,29 @@ +Model Checking +================================================================================ + +.. currentmodule:: promod3.modelling + +This chapter describes additional functionality to check models. Some of this +functionality is used within the modelling pipeline. + +Detecting Ring Punches +-------------------------------------------------------------------------------- + +.. autofunction:: GetRings + +.. autofunction:: GetRingPunches + +.. autofunction:: HasRingPunches + +.. autofunction:: FilterCandidates + +.. autofunction:: FilterCandidatesWithSC + +Model Checking With MolProbity +-------------------------------------------------------------------------------- + +.. autofunction:: RunMolProbity + +.. autofunction:: RunMolProbityEntity + +.. autofunction:: ReportMolProbityScores \ No newline at end of file diff --git a/loop/doc/monte_carlo.rst b/modelling/doc/monte_carlo.rst similarity index 75% rename from loop/doc/monte_carlo.rst rename to modelling/doc/monte_carlo.rst index f89e1cc1..e1ac9e71 100644 --- a/loop/doc/monte_carlo.rst +++ b/modelling/doc/monte_carlo.rst @@ -1,7 +1,7 @@ -Monte Carlo Sampling +Generating Loops De Novo ================================================================================ -.. currentmodule:: promod3.loop +.. currentmodule:: promod3.modelling The Monte Carlo capabilities of |project| are mainly targeted at generating de novo structure candidates for loops or N-/C-Termini. Every iteration of the @@ -24,9 +24,9 @@ provided by |project|. A convenient function to perform Monte Carlo sampling using a simulated annealing scheme. In every iteration, a new loop conformation gets proposed by - the provided **sampler** and closed by the **closer**. Upon scoring, this new + the provided *sampler* and closed by the *closer*. Upon scoring, this new conformation gets accepted/rejected using a metropolis criterion based on the - temperature given by the **cooler** + temperature given by the *cooler* => acceptance probability: exp(-delta_score/T). The result is stored in *bb_list* and is either the lowest energy conformation ever encountered or the last accepted proposal. @@ -42,7 +42,7 @@ provided by |project|. :param bb_list: The chosen conformation gets stored here. :param initialize: Whether a new bb_list should be generated as starting point, based on the samplers Initialize function. - The input **bb_list** gets used otherwise. + The input *bb_list* gets used otherwise. :param seed: Seed for internal random number generator. :param lowest_energy_conformation: If true, we choose the lowest scoring conformation of the trajectory. Otherwise, @@ -53,7 +53,7 @@ provided by |project|. :type scorer: :ref:`mc-scorer-object` :type cooler: :ref:`mc-cooler-object` :type steps: :class:`int` - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` :type initialize: :class:`bool` :type seed: :class:`int` :type lowest_energy_conformation: :class:`bool` @@ -73,10 +73,11 @@ The sampler objects can be used to generate initial conformations and propose new conformations for a sequence of interest. They build the basis for any Monte Carlo sampling pipeline. -.. class:: PhiPsiSampler(sequence,torsion_sampler,[n_stem_phi=-1.0472,c_stem_psi=-0.78540,prev_aa='A',next_aa='A',seed=0]) +.. class:: PhiPsiSampler(sequence, torsion_sampler, n_stem_phi=-1.0472,\ + c_stem_psi=-0.78540, prev_aa='A', next_aa='A', seed=0) The PhiPsiSampler randomly draws and sets phi/psi dihedral angles from - a distribution provided by the **torsion_sampler**. + a distribution provided by the *torsion_sampler*. :param sequence: Sequence that should be sampled :param torsion_sampler: Sampler, from which the phi/psi pairs are drawn. It @@ -86,23 +87,23 @@ for any Monte Carlo sampling pipeline. :param n_stem_phi: Phi angle of the n_stem. This angle is not defined in the sampling region. If the first residue gets selected for changing the dihedral angles, it draws a psi angle - given **n_stem_phi**. + given *n_stem_phi*. :param c_stem_psi: Psi angle of c_stem. This angle is not defined in the sampling region. If the last residue gets selected for changing the dihedral angles, it draws a phi angle - given **c_stem_psi**. + given *c_stem_psi*. :param prev_aa: This parameter is necessary to extract the according histogram index for the first residue from the - **torsion_sampler**. (Remember: The torsion sampler + *torsion_sampler*. (Remember: The torsion sampler always considers triplets) :param next_aa: This parameter is necessary to extract the according histogram index for the last residue from the - **torsion_sampler**. (Remember: The torsion sampler + *torsion_sampler*. (Remember: The torsion sampler always considers triplets) :param seed: Seed for the internal random number generators. :type sequence: :class:`str` - :type torsion_sampler: :class:`TorsionSampler` + :type torsion_sampler: :class:`~promod3.loop.TorsionSampler` :type n_stem_phi: :class:`float` :type c_stem_psi: :class:`float` :type prev_aa: :class:`str` @@ -113,11 +114,11 @@ for any Monte Carlo sampling pipeline. .. method:: Initialize(bb_list) - Sets up a new :class:`BackboneList` by randomly drawing phi/psi dihedral - angles. + Sets up a new :class:`~promod3.loop.BackboneList` by randomly drawing + phi/psi dihedral angles. :param bb_list: The newly created conformation gets stored in here - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` .. method:: ProposeStep(actual_positions, proposed_position) @@ -129,19 +130,20 @@ for any Monte Carlo sampling pipeline. :param actual_positions: Conformation to be changed :param proposed_positions: Changed conformation gets stored in here - :type actual_positions: :class:`BackboneList` - :type proposed_positions: :class:`BackboneList` + :type actual_positions: :class:`~promod3.loop.BackboneList` + :type proposed_positions: :class:`~promod3.loop.BackboneList` - :raises: :exc:`~exceptions.RuntimeError` If size of **actual_positions** + :raises: :exc:`~exceptions.RuntimeError` If size of *actual_positions* is not consistent with the internal sequence. Note, that the sequence itself doesn't get checked for efficiency reasons. -.. class:: SoftSampler(sequence,torsion_sampler,max_dev,[n_stem_phi=-1.0472,c_stem_psi=-0.78540,prev_aa='A',next_aa='A',seed=0]) +.. class:: SoftSampler(sequence, torsion_sampler, max_dev, n_stem_phi=-1.0472,\ + c_stem_psi=-0.78540, prev_aa='A', next_aa='A', seed=0) Instead of drawing completely new values for a residues phi/psi angles, - only one angle gets altered by a maximum value of **max_dev** in the + only one angle gets altered by a maximum value of *max_dev* in the SoftSampler. :param sequence: Sequence that should be sampled @@ -153,26 +155,26 @@ for any Monte Carlo sampling pipeline. value per sampling step. :param n_stem_phi: Phi angle of the n_stem. This angle is not defined in the sampling region. If the psi angle of the first - residue gets selected to be changed, **n_stem_phi** is + residue gets selected to be changed, *n_stem_phi* is used to calculate the phi/psi probability to estimate the acceptance probability. :param c_stem_psi: Psi angle of c_stem. This angle is not defined in the sampling region. If the phi angle of the last - residue gets selected to be changed, **c_stem_psi** is + residue gets selected to be changed, *c_stem_psi* is used to calculate the phi/psi probability to estimate the acceptance probability. :param prev_aa: This parameter is necessary to extract the according histogram index for the first residue from the - **torsion_sampler**. (Remember: The torsion sampler + *torsion_sampler*. (Remember: The torsion sampler always considers triplets) :param next_aa: This parameter is necessary to extract the according histogram index for the last residue from the - **torsion_sampler**. (Remember: The torsion sampler + *torsion_sampler*. (Remember: The torsion sampler always considers triplets) :param seed: Seed for the internal random number generators. :type sequence: :class:`str` - :type torsion_sampler: :class:`TorsionSampler` + :type torsion_sampler: :class:`~promod3.loop.TorsionSampler` :type n_stem_phi: :class:`float` :type c_stem_psi: :class:`float` :type prev_aa: :class:`str` @@ -183,17 +185,17 @@ for any Monte Carlo sampling pipeline. .. method:: Initialize(bb_list) - Sets up a new :class:`BackboneList` by randomly drawing phi/psi dihedral - angles. + Sets up a new :class:`~promod3.loop.BackboneList` by randomly drawing + phi/psi dihedral angles. :param bb_list: The newly created conformation gets stored in here - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` .. method:: ProposeStep(actual_positions, proposed_position) In an iterative process, the SoftSampler randomly selects one of the possible dihedral angles in a conformation and changes it by a random value - in [-**max_dev**,**max_dev**]. The acceptance probability of this change is + in [-*max_dev*, *max_dev*]. The acceptance probability of this change is the fraction of the phi/psi probability before and after changing the single angle in the particular residue. There is a maximum of 100 iterations. It is therefore theoretically possible, that nothing happens @@ -202,40 +204,43 @@ for any Monte Carlo sampling pipeline. :param actual_positions: Conformation to be changed :param proposed_positions: Changed conformation gets stored in here - :type actual_positions: :class:`BackboneList` - :type proposed_positions: :class:`BackboneList` + :type actual_positions: :class:`~promod3.loop.BackboneList` + :type proposed_positions: :class:`~promod3.loop.BackboneList` - :raises: :exc:`~exceptions.RuntimeError` If size of **actual_positions** + :raises: :exc:`~exceptions.RuntimeError` If size of *actual_positions* is not consistent with the internal sequence. Note, that the sequence itself doesn't get checked for efficiency reasons. -.. class:: FragmentSampler(sequence, fraggers, [init_bb_list = BackboneList(sequence), sampling_start_index = 0, init_fragments = 3, seed = 0]) - - The FragmentSampler samples by replacing full fragments originating from a list - of :class:`Fragger` objects. The region, that actually gets sampled is - determined by **sampling_start_index** and number of :class:`Fragger` objects - being available. All parts not covered by any fragger remain rigid. - - :param sequence: Overall sequence - :param fraggers: A list of :class:`Fragger` objects. The first fragger - covers the region starting at the letter - **sampling_start_index** of the **sequence** and so on. - All fraggers must contain fragments of equal size. - :param init_bb_list: Initial conformation, that serves as a starting point for - sampling. The default gets constructed using the default - constructor of :class:`BackboneList` and results in - a helix. +.. class:: FragmentSampler(sequence, fraggers,\ + init_bb_list=BackboneList(sequence), + sampling_start_index=0, init_fragments=3, seed=0) + + The FragmentSampler samples by replacing full fragments originating from a + list of :class:`~promod3.loop.Fragger` objects. The region, that actually gets + sampled is determined by *sampling_start_index* and number of + :class:`~promod3.loop.Fragger` objects being available. All parts not covered + by any fragger remain rigid. + + :param sequence: Overall sequence + :param fraggers: A list of :class:`~promod3.loop.Fragger` objects. The first + fragger covers the region starting at the letter + *sampling_start_index* of the *sequence* and so on. + All fraggers must contain fragments of equal size. + :param init_bb_list: Initial conformation, that serves as a starting point for + sampling. The default gets constructed using the default + constructor of :class:`~promod3.loop.BackboneList` and + results in a helix. :param sampling_start_index: Defines the beginning of the region, that actually gets sampled. :param init_fragments: When calling the Initialize function, the positions get set - to the ones of **init_bb_list**. This is the number of + to the ones of *init_bb_list*. This is the number of fragments that gets randomly selected and inserted. :param seed: Seed for the internal random number generators :type sequence: :class:`str` :type fraggers: :class:`str` - :type init_bb_list: :class:`BackboneList` + :type init_bb_list: :class:`~promod3.loop.BackboneList` :type samplint_start_index: :class:`int` :type init_fragments: :class:`int` :type seed: :class:`int` @@ -243,12 +248,12 @@ for any Monte Carlo sampling pipeline. .. method:: Initialize(bb_list) - Sets up a new :class:`BackboneList` by setting the setting - bb_list = **init_bb_list** and randomly replace n fragments - with n = **init_fragments** + Sets up a new :class:`~promod3.loop.BackboneList` by setting the setting + bb_list = *init_bb_list* and randomly replace n fragments + with n = *init_fragments* :param bb_list: The newly created conformation gets stored in here - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` .. method:: ProposeStep(actual_step, proposed_position) @@ -259,8 +264,8 @@ for any Monte Carlo sampling pipeline. :param actual_positions: Conformation to be changed :param proposed_positions: Changed conformation gets stored in here - :type actual_positions: :class:`BackboneList` - :type proposed_positions: :class:`BackboneList` + :type actual_positions: :class:`~promod3.loop.BackboneList` + :type proposed_positions: :class:`~promod3.loop.BackboneList` @@ -279,7 +284,7 @@ or simple stem superposition in case of terminal sampling. The CCDCloser applies the CCD algorithm to the sampled conformation to enforce the match between the conformations stem residue and - the stems given by the closer. The **torsion_sampler** is used to + the stems given by the closer. The *torsion_sampler* is used to avoid moving into unfavourable phi/psi ranges. :param n_stem: Defining stem positions the closed conformation @@ -289,16 +294,17 @@ or simple stem superposition in case of terminal sampling. should adapt. See :meth:`~CCD.CCD()`. :param sequence: Sequence of the conformation to be closed. - :param torsion_sampler: To enforce valid phi/psi ranges. Alternatively - you can also pass a list of :class:`TorsionSampler` - objects to assign a unique torsion sampler to - every residue of the conformation to be closed. + :param torsion_sampler: To enforce valid phi/psi ranges. Alternatively, you + can also pass a list of sampler objects to assign a + unique torsion sampler to every residue of the + conformation to be closed. :param seed: Seed for internal random generators. :type n_stem: :class:`ost.mol.ResidueHandle` :type c_stem: :class:`ost.mol.ResidueHandle` :type sequence: :class:`str` - :type torsion_sampler: :class:`TorsionSampler` + :type torsion_sampler: :class:`~promod3.loop.TorsionSampler` / :class:`list` + of :class:`~promod3.loop.TorsionSampler` :type seed: :class:`int` .. method:: Close(actual_positions,closed_positions) @@ -306,8 +312,8 @@ or simple stem superposition in case of terminal sampling. :param actual_positions: Conformation to be closed. :param closed_positions: Closed conformation gets stored in here. - :type actual_positions: :class:`BackboneList` - :type closed_positions: :class:`BackboneList` + :type actual_positions: :class:`~promod3.loop.BackboneList` + :type closed_positions: :class:`~promod3.loop.BackboneList` :returns: Whether CCD converged @@ -332,8 +338,8 @@ or simple stem superposition in case of terminal sampling. :param actual_positions: Conformation to be closed. :param closed_positions: Closed conformation gets stored in here. - :type actual_positions: :class:`BackboneList` - :type closed_positions: :class:`BackboneList` + :type actual_positions: :class:`~promod3.loop.BackboneList` + :type closed_positions: :class:`~promod3.loop.BackboneList` :returns: Whether CCD converged @@ -359,8 +365,8 @@ or simple stem superposition in case of terminal sampling. :param actual_positions: Conformation to be closed. :param closed_positions: Closed conformation gets stored in here. - :type actual_positions: :class:`BackboneList` - :type closed_positions: :class:`BackboneList` + :type actual_positions: :class:`~promod3.loop.BackboneList` + :type closed_positions: :class:`~promod3.loop.BackboneList` :returns: Whether KIC found a solution @@ -398,7 +404,7 @@ The scorer asses a proposed conformation and are intended to return a pseudo energy, the lower the better. -.. class:: LinearScorer(scorer,start_resnum,chain_index,weights) +.. class:: LinearScorer(scorer, start_resnum, chain_index, weights) The LinearScorer allows to combine the scores available from :class:`~scoring.BackboneOverallScorer` in a linear manner. @@ -426,7 +432,7 @@ energy, the lower the better. :param bb_list: Current loop conformation to be scored. - :type bb_list: :class:`BackboneList` + :type bb_list: :class:`~promod3.loop.BackboneList` :returns: A linear combination of the scores @@ -442,9 +448,9 @@ to their GetTemperature function. .. class:: ExponentialCooler(change_frequency, start_temperature, cooling_factor) - The exponential cooler starts with a given **start_temperature** and counts the - calls to its GetTemperature function. According to the **change_frequency**, - the returned temperature gets multiplied by the **cooling_factor**. + The exponential cooler starts with a given *start_temperature* and counts the + calls to its GetTemperature function. According to the *change_frequency*, + the returned temperature gets multiplied by the *cooling_factor*. :param change_frequency: Frequency to change temperature :param start_temperature: temperature to start with @@ -460,4 +466,4 @@ to their GetTemperature function. .. method:: Reset() - Sets current temperature back to **start_temperature** + Sets current temperature back to *start_temperature* diff --git a/modelling/doc/pipeline.rst b/modelling/doc/pipeline.rst new file mode 100644 index 00000000..06692d9e --- /dev/null +++ b/modelling/doc/pipeline.rst @@ -0,0 +1,226 @@ +Modelling Pipeline +================================================================================ + +.. currentmodule:: promod3.modelling + +A protein homology modelling pipeline has the following main steps: + +- Build a raw model from the template (see :func:`BuildRawModel` function) +- Perform loop modelling to close (or remove) all gaps (see functions + :func:`CloseSmallDeletions`, :func:`RemoveTerminalGaps`, + :func:`MergeGapsByDistance`, :func:`FillLoopsByDatabase`, + :func:`FillLoopsByMonteCarlo`, :func:`CloseLargeDeletions`) +- Build sidechains (see :func:`BuildSidechains` function) +- Minimize energy of final model using molecular mechanics + (see :func:`MinimizeModelEnergy` function) + +The last steps to go from a raw model to a final model can easily be executed +with the :func:`BuildFromRawModel` function. If you want to run and tweak the +internal steps, you can start with the following code and adapt it to your +purposes: + +.. _modelling_steps_example: + +.. literalinclude:: ../../../tests/doc/scripts/modelling_steps.py + +In the default pipeline above, we call :func:`FillLoopsByDatabase` multiple +times. First, we try to close "easy" gaps which require few extensions (we wish +to limit the damage we do on the template) and for which we have plenty of loop +candidates. If some gaps cannot be closed like this, we try less restrictive +options. This approach is helpful if neighboring gaps are close together and the +one closer to the C-terminus is easier to close. Several variants of the +pipeline were evaluated on 1752 target-template-pairs and this one worked best. + +Build Raw Modelling Handle +-------------------------------------------------------------------------------- + +.. class:: ModellingHandle + + Handles the result for structure model building and provides high-level methods + to turn an initial raw model (see :func:`~promod3.modelling.BuildRawModel`) + into a complete protein model by removing any existing gaps. + + .. attribute:: model + + The resulting model. This includes one chain per target chain (in the same + order as the sequences in `seqres`) and (if they were included) a chain + named '_' for ligands. You can therefore access `model.chains` items and + `seqres` items with the same indexing and the optional ligand chain follows + afterwards. + + :type: :class:`~ost.mol.EntityHandle` + + .. attribute:: gaps + + List of gaps in the model that could not be copied from the template. These + gaps may be the result of insertions/deletions in the alignment or due to + missing or incomplete backbone coordinates in the template structure. + Gaps of different chains are appended one after another. + + :type: :class:`StructuralGapList` + + .. attribute:: seqres + + List of sequences with one :class:`~ost.seq.SequenceHandle` for each chain + of target protein. + + :type: :class:`~ost.seq.SequenceList` + + .. attribute:: backbone_scorer_env + + Backbone score environment attached to this handle. A default environment + can be set with :func:`SetupDefaultBackboneScorer`. Additional information + can be added to the environment before running the pipeline steps. + + :type: :class:`~promod3.scoring.BackboneScoreEnv` + + .. attribute:: backbone_scorer + + Backbone scorer container attached to this handle. A default set of scorers + can be initialized with :func:`SetupDefaultBackboneScorer`. + + :type: :class:`~promod3.scoring.BackboneOverallScorer` + + +.. function:: BuildRawModel(aln, include_ligands=False, chain_names=\ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz",\ + spdbv_style=False) + + Builds a raw (pseudo) model from the alignment. Can either take a single + alignment handle or an alignment handle list. Every list item is treated as a + single chain in the final raw model. + + Each alignment handle must contain exactly two sequences and the second + sequence is considered the template sequence, which must have a + :class:`~ost.mol.EntityView` attached. + + This is a basic protein core modelling algorithm that copies backbone + coordinates based on the sequence alignment. For matching residues, the + side chain coordinates are also copied. Gaps are ignored. Hydrogen an + deuterium atoms are not copied into the model. + + The function tries to reuse as much as possible from the template. Modified + residues are treated as follows: + + - Selenium methionine residues are converted to methionine + + - Side chains which contain all atoms of the parent amino acid, e.g. + phosphoserine are copied as a whole with the modifications stripped off. + + Residues with missing backbone atoms and D-peptides are generally skipped and + treated as gaps. Missing Cbeta atoms in backbone are ok and reconstructed. + If all residues are skipped (e.g. Calpha traces), we report an error and + return an empty model. + + Residue numbers are set such that missing residue in gaps are honoured and + subsequent loop modelling can insert new residues without having to renumber. + **The numbering of residues starts for every chain with the value 1**. + + The returned :class:`ModellingHandle` stores the obtained raw model as well + as information about insertions and deletions in the gaps list. + + :param aln: Single alignment handle for raw model with single chain or + list of alignment handles for raw model with multiple chains. + :type aln: :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList` + + :param include_ligands: True, if we wish to include ligands in the model. This + searches for ligands in all OST handles of the views + attached to the alignments. Ligands are identified + with the `ligand` property in the handle (set by OST + based on HET records) or by the chain name '_' (as set + in SMTL). All ligands are added to a new chain named + '_'. + :type include_ligands: :class:`bool` + + :param chain_names: Chains are named by a single chanacter taken from this. + :type chain_names: :class:`str` + + :param spdbv_style: True, if we need a model in the old SPDBV style. + :type spdbv_style: :class:`bool` + + :return: Raw (pseudo) model from the alignment. + :rtype: :class:`ModellingHandle` + + :raises: A :exc:`RuntimeError` when: + + - the alignments do not have two sequences + - the second sequence does not have an attached structure + - the residues of the template structure do not match with the + alignment sequence (note that you can set an "offset" (see + :meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`) for the + template sequence (but not for the target)) + - the target sequence has a non-zero offset (cannot be honored as + the resulting model will always start its residue numbering at 1) + +Modelling Steps +-------------------------------------------------------------------------------- + +.. autofunction:: BuildFromRawModel + +.. function:: SetupDefaultBackboneScorer(mhandle) + + Setup scorers and environment for medling with backbones. + This one is already tailored towards a certain modelling job. + The scorers added (with their respective keys) are: + + - "cb_packing": :class:`~promod3.scoring.CBPackingScorer` + - "cbeta": :class:`~promod3.scoring.CBetaScorer` + - "reduced": :class:`~promod3.scoring.ReducedScorer` + - "clash": :class:`~promod3.scoring.ClashScorer` + - "hbond": :class:`~promod3.scoring.HBondScorer` + - "ss_agreement": :class:`~promod3.scoring.SSAgreementScorer` + - "torsion": :class:`~promod3.scoring.TorsionScorer` + - "pairwise": :class:`~promod3.scoring.PairwiseScorer` + - "density": :class:`~promod3.scoring.DensityScorer` + + :param mhandle: The modelling handle this scorer should be dedicated to. + This will set the properties + :attr:`~ModellingHandle.backbone_scorer` and + :attr:`~ModellingHandle.backbone_scorer_env` of `mhandle`. + :type mhandle: :class:`~promod3.modelling.ModellingHandle` + +.. function:: IsBackboneScorerSet(mhandle) + + :return: True, if :attr:`~ModellingHandle.backbone_scorer` of `mhandle` is + set. + :rtype: :class:`bool` + :param mhandle: Modelling handle to check. + :type mhandle: :class:`ModellingHandle` + +.. function:: IsBackboneScorerEnvSet(mhandle) + + :return: True, if :attr:`~ModellingHandle.backbone_scorer_env` of `mhandle` is + set. + :rtype: :class:`bool` + :param mhandle: Modelling handle to check. + :type mhandle: :class:`ModellingHandle` + +.. function:: RemoveTerminalGaps(mhandle) + + Removes terminal gaps without modelling them (just removes them from the list + of gaps). This is useful for pipelines which lack the possibility to properly + model loops at the termini. + + :param mhandle: Modelling handle on which to apply change. + :type mhandle: :class:`ModellingHandle` + + :return: Number of gaps which were removed. + :rtype: :class:`int` + +.. autofunction:: CloseSmallDeletions + +.. autofunction:: MergeGapsByDistance + +.. autofunction:: FillLoopsByDatabase + +.. autofunction:: FillLoopsByMonteCarlo + +.. autofunction:: CloseLargeDeletions + +.. autofunction:: ModelTermini + +.. autofunction:: BuildSidechains + +.. autofunction:: MinimizeModelEnergy + +.. autofunction:: CheckFinalModel diff --git a/modelling/pymod/_molprobity.py b/modelling/pymod/_molprobity.py index d1fe777a..ad431058 100644 --- a/modelling/pymod/_molprobity.py +++ b/modelling/pymod/_molprobity.py @@ -7,7 +7,7 @@ import subprocess, tempfile, os def RunMolProbity(target_pdb, molprobity_bin=None): '''Run ``MolProbity`` from ``Phenix`` on a given PDB file. - MolProbity score computation: (taken from molprobity source code) + MolProbity score computation: (formula from molprobity source code) .. code-block:: python diff --git a/modelling/pymod/_pipeline.py b/modelling/pymod/_pipeline.py index f8fea315..70c20c5b 100644 --- a/modelling/pymod/_pipeline.py +++ b/modelling/pymod/_pipeline.py @@ -409,11 +409,12 @@ def BuildFromRawModel(mhandle, use_amber_ff=False, extra_force_fields=list()): modelling pipeline. For reproducibility, we recommend that you keep copies of custom pipelines. - If you wish to use a custom scorer, you can attach your own scorers to the - `mhandle`. In that case you need to make sure that `mhandle.backbone_scorer` - and `mhandle.backbone_scorer_env` are both set and are consistent. You can - also call :meth:`SetupDefaultBackboneScorer` and adapt the default scorer - for your purposes. + If you wish to adapt the scoring used during loop closing, you can set + :attr:`~ModellingHandle.backbone_scorer` and + :attr:`~ModellingHandle.backbone_scorer_env` of `mhandle`, but you must + ensure that both are set and are consistent. Alternatively, you can call + :func:`SetupDefaultBackboneScorer` and adapt the default scorer and score + environment for your purposes. If the function fails to close all gaps, it will produce a warning and return an incomplete model. @@ -423,8 +424,8 @@ def BuildFromRawModel(mhandle, use_amber_ff=False, extra_force_fields=list()): :type mhandle: :class:`ModellingHandle` :param use_amber_ff: if True, use the AMBER force field instead of the def. - CHARMM one (see :meth:`ost.mol.mm.LoadAMBERForcefield` - and :meth:`ost.mol.mm.LoadCHARMMForcefield`). + CHARMM one (see :func:`ost.mol.mm.LoadAMBERForcefield` + and :func:`ost.mol.mm.LoadCHARMMForcefield`). Both do a similarly good job without ligands (CHARMM slightly better), but you will want to be consistent with the optional force fields in `extra_force_fields`. diff --git a/scoring/doc/index.rst b/scoring/doc/index.rst index 1395633c..1dc397d1 100644 --- a/scoring/doc/index.rst +++ b/scoring/doc/index.rst @@ -20,5 +20,5 @@ Contents: .. toctree:: :maxdepth: 2 - Backbone Score Environment <backbone_score_env> - Backbone Scorers <backbone_scorers> + backbone_score_env + backbone_scorers -- GitLab