From ec6bde94f6f9a3d0c27798b55da0debfad649881 Mon Sep 17 00:00:00 2001 From: Gerardo Tauriello <gerardo.tauriello@unibas.ch> Date: Mon, 23 Jan 2017 17:55:51 +0100 Subject: [PATCH] Documented several undocumented features. --- modules/mol/alg/doc/molalg.rst | 99 ++++++++++- modules/mol/alg/pymod/superpose.py | 66 ++++--- modules/mol/base/doc/entity.rst | 5 +- modules/seq/alg/pymod/renumber.py | 4 +- modules/seq/base/doc/seq.rst | 272 ++++++++++++++++++++++++----- 5 files changed, 360 insertions(+), 86 deletions(-) diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index a5796db6a..e702b49ac 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -636,6 +636,11 @@ The following function detects steric clashes in atomic structures. Two atoms ar :returns: true if the residue names are the same, false otherwise +Superposing structures +-------------------------------------------------------------------------------- + +.. autofunction:: Superpose + .. autofunction:: ParseAtomNames .. autofunction:: MatchResidueByNum @@ -646,7 +651,99 @@ The following function detects steric clashes in atomic structures. Two atoms ar .. autofunction:: MatchResidueByGlobalAln -.. autofunction:: Superpose +.. class:: SuperpositionResult + + .. attribute:: rmsd + + RMSD of the superposed entities. + + .. attribute:: view1 + view2 + + Two :class:`~ost.mol.EntityView` used in superposition (not set if methods + with :class:`~ost.geom.Vec3List` used). + + .. attribute:: transformation + + Transformation (:class:`~ost.geom.Mat4`) used to map :attr:`view1` onto + :attr:`view2`. + + .. attribute:: fraction_superposed + rmsd_superposed_atoms + ncycles + + For iterative superposition (:func:`IterativeSuperposeSVD`): fraction and + RMSD of atoms that were superposed with a distance below the given + threshold and the number of iteration cycles performed. + +.. method:: SuperposeSVD(view1, view2, apply_transform=True) + SuperposeSVD(list1, list2) + + Superposition of two sets of atoms minimizing RMSD using a classic SVD based + algorithm. + + Note that the atom positions in the view are taken blindly in the order in + which the atoms appear. + + :param view1: View on the model entity + :type view1: :class:`~ost.mol.EntityView` + :param view2: View on the reference entity + :type view2: :class:`~ost.mol.EntityView` + :param list1: List of atom positions for model entity + :type list1: :class:`~ost.geom.Vec3List` + :param list2: List of atom positions for reference entity + :type list2: :class:`~ost.geom.Vec3List` + :param apply_transform: If True, the superposition transform is applied to + the (full!) entity handle linked to *view1*. + :type apply_transform: :class:`bool` + + :return: An instance of :class:`SuperpositionResult`. + +.. method:: IterativeSuperposeSVD(view1, view2, max_iterations=5, \ + distance_threshold=3.0, apply_transform=True) + IterativeSuperposeSVD(list1, list2, max_iterations=5, \ + distance_threshold=3.0) + + Iterative superposition of two sets of atoms. In each iteration cycle, we + keep a fraction of atoms with distances below *distance_threshold* and get + the superposition considering only those atoms. + + Note that the atom positions in the view are taken blindly in the order in + which the atoms appear. + + :param view1: View on the model entity + :type view1: :class:`~ost.mol.EntityView` + :param view2: View on the reference entity + :type view2: :class:`~ost.mol.EntityView` + :param list1: List of atom positions for model entity + :type list1: :class:`~ost.geom.Vec3List` + :param list2: List of atom positions for reference entity + :type list2: :class:`~ost.geom.Vec3List` + :param max_iterations: Max. number of iterations to be performed + :type max_iterations: :class:`int` + :param distance_threshold: Distance threshold defining superposed atoms + :type distance_threshold: :class:`float` + :param apply_transform: If True, the superposition transform is applied to + the (full!) entity handle linked to *view1*. + :type apply_transform: :class:`bool` + + :return: An instance of :class:`SuperpositionResult`. + + :raises: Exception if atom counts do not match or if less than 3 atoms. + +.. method:: CalculateRMSD(view1, view2, transformation=geom.Mat4()) + + :return: RMSD of atom positions (taken blindly in the order in which the + atoms appear) in the two given views. + :rtype: :class:`float` + + :param view1: View on the model entity + :type view1: :class:`~ost.mol.EntityView` + :param view2: View on the reference entity + :type view2: :class:`~ost.mol.EntityView` + :param transformation: Optional transformation to apply on each atom position + of *view1*. + :type transformation: :class:`~ost.geom.Mat4` .. _traj-analysis: diff --git a/modules/mol/alg/pymod/superpose.py b/modules/mol/alg/pymod/superpose.py index 9af62d7fa..e41942d57 100644 --- a/modules/mol/alg/pymod/superpose.py +++ b/modules/mol/alg/pymod/superpose.py @@ -275,28 +275,25 @@ def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all'): def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False, max_iterations=5, distance_threshold=3.0): """ Superposes the model entity onto the reference. To do so, two views are - created, returned with the result. **atoms** describes what goes into these - views and **match** the selection method. For superposition, - :func:`~ost.mol.alg.SuperposeSVD` is called. For matching, the following methods - are recognised: + created, returned with the result. *atoms* describes what goes into these + views and *match* the selection method. For superposition, + :func:`SuperposeSVD` or :func:`IterativeSuperposeSVD` are called (depending on + *iterative*). For matching, the following methods are recognised: - * ``number`` - select residues by residue number, includes **atoms**, calls - :func:`~ost.mol.alg.MatchResidueByNum` + * ``number`` - select residues by residue number, includes *atoms*, calls + :func:`MatchResidueByNum` - * ``index`` - select residues by index in chain, includes **atoms**, calls - :func:`~ost.mol.alg.MatchResidueByIdx` + * ``index`` - select residues by index in chain, includes *atoms*, calls + :func:`MatchResidueByIdx` * ``local-aln`` - select residues from a Smith/Waterman alignment, includes - **atoms**, calls :func:`~ost.mol.alg.MatchResidueByLocalAln` + *atoms*, calls :func:`MatchResidueByLocalAln` * ``global-aln`` - select residues from a Needleman/Wunsch alignment, includes - **atoms**, calls :func:`~ost.mol.alg.MatchResidueByGlobalAln` + *atoms*, calls :func:`MatchResidueByGlobalAln` - There is also an option to use **iterative** matching which allows for an - iterative approach to superposing two structures. **iterative** takes two - additional parameters, **max_iteration** and **distance_threshold**. - - :param ent_a: The model entity + :param ent_a: The model entity (superposition transform is applied on full + entity handle here) :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` :param ent_b: The reference entity @@ -308,40 +305,39 @@ def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False, max_it :param atoms: The subset of atoms to be used in the superposition :type atoms: :class:`str`, :class:`list`, :class:`set` - :param max_iterations: They number of iterations that will be run during - iterative superposition + :param iterative: Whether or not to use iterative superpositon. + :type iterative: :class:`bool` + + :param max_iterations: Max. number of iterations for + :func:`IterativeSuperposeSVD` + (only if *iterative* = True) :type max_iterations: :class:`int` - :param distance_threshold: The distance threshold between which two atoms - that will be used in the next superposition - iteration + :param distance_threshold: Distance threshold for + :func:`IterativeSuperposeSVD` + (only if *iterative* = True) :type distance_threshold: :class:`float` - :returns: An instance of :class:`SuperpositionResult`, containing members - - * ``rmsd`` - RMSD of the superposed entities - - * ``view1`` - First :class:`~ost.mol.EntityView` used - - * ``view2`` - Second :class:`~ost.mol.EntityView` used + :returns: An instance of :class:`SuperpositionResult`. """ - not_supported="Superpose called with unsupported matching request." + not_supported = "Superpose called with unsupported matching request." ## create views to superpose if match.upper() == 'NUMBER': view_a, view_b = MatchResidueByNum(ent_a, ent_b, atoms) elif match.upper() == 'INDEX': - view_a, view_b=MatchResidueByIdx(ent_a, ent_b, atoms) + view_a, view_b = MatchResidueByIdx(ent_a, ent_b, atoms) elif match.upper() == 'LOCAL-ALN': - view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms, - ost.seq.alg.LocalAlign) + view_a, view_b = _MatchResidueByAln(ent_a, ent_b, atoms, + ost.seq.alg.LocalAlign) elif match.upper() == 'GLOBAL-ALN': - view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms, - ost.seq.alg.GlobalAlign) + view_a, view_b = _MatchResidueByAln(ent_a, ent_b, atoms, + ost.seq.alg.GlobalAlign) else: raise ValueError(not_supported) ## action if iterative: - res=ost.mol.alg.IterativeSuperposeSVD(view_a, view_b, max_iterations, distance_threshold) + res = ost.mol.alg.IterativeSuperposeSVD(view_a, view_b, max_iterations, + distance_threshold) else: - res=ost.mol.alg.SuperposeSVD(view_a, view_b) + res = ost.mol.alg.SuperposeSVD(view_a, view_b) return res diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 849d37af8..6fcaf9834 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -1582,7 +1582,8 @@ Other Entity-Related Functions :returns: :class:`EntityView` -.. function:: CreateEntityFromView(view, include_exlusive_atoms, handle=EntityHandle()) +.. function:: CreateEntityFromView(view, include_exlusive_atoms, \ + handle=EntityHandle()) This function behaves exactly like :meth:`EntityHandle.Copy`, except that only atoms, residues, chains and bonds that are present in the view will be @@ -1596,7 +1597,7 @@ Other Entity-Related Functions residues, chains, bonds and torsions will be added to handle. This is useful to combine several entities into one. - :returns :class:`EntityHandle` + :returns: :class:`EntityHandle` Residue Numbering diff --git a/modules/seq/alg/pymod/renumber.py b/modules/seq/alg/pymod/renumber.py index 899ad1e16..9f6dd02d5 100644 --- a/modules/seq/alg/pymod/renumber.py +++ b/modules/seq/alg/pymod/renumber.py @@ -44,7 +44,9 @@ def Renumber(seq_handle, sequence_number_with_attached_view=1): sequence and the full-length target sequence. The aligned model sequence or the alignment itself with an attached view needs to be provided. Upon succcess, the renumbered entity is returned. - If an alignment is given, the sequence must + If an alignment is given, the first sequence of the alignment is considered + the full-length sequence and it must match the model sequence wherever it is + aligned (i.e. excluding gaps). .. code-block:: python diff --git a/modules/seq/base/doc/seq.rst b/modules/seq/base/doc/seq.rst index f2e1edbf3..5c6c38411 100644 --- a/modules/seq/base/doc/seq.rst +++ b/modules/seq/base/doc/seq.rst @@ -19,7 +19,7 @@ Attaching Structures to Sequences As OpenStructure is a computational structural biology framework, it is not surprising that the sequence classes have been designed to work together with -structural data. Each sequence can have an attached :class:`~mol.EntityView` +structural data. Each sequence can have an attached :class:`~ost.mol.EntityView` allowing for fast mapping between residues in the entity view and position in the sequence. @@ -45,16 +45,17 @@ sequence offset is 0. Loading and Saving Sequences and Alignments -------------------------------------------------------------------------------- -The :mod:`io` module supports input and output of common sequence formats. -Single sequences can be loaded from disk with :func:`io.LoadSequence`, -alignments are loaded with :func:`io.LoadAlignment` and lists of sequences are loaded with :func:`io.LoadSequenceList`. In addition to the file based input -methods, sequences can also be loaded from a string: +The :mod:`~ost.io` module supports input and output of common sequence formats. +Single sequences can be loaded from disk with :func:`~ost.io.LoadSequence`, +alignments are loaded with :func:`~ost.io.LoadAlignment` and lists of sequences +are loaded with :func:`~ost.io.LoadSequenceList`. In addition to the file based +input methods, sequences can also be loaded from a string: .. code-block:: python - seq_string='''>sequence + seq_string = '''>sequence abcdefghiklmnop''' - s=io.SequenceFromString(seq_string, 'fasta') + s = io.SequenceFromString(seq_string, 'fasta') print s.name, s # will print "sequence abcdefghiklmnop" Note that, in that case specifying the format is mandatory. @@ -62,21 +63,24 @@ Note that, in that case specifying the format is mandatory. The SequenceHandle -------------------------------------------------------------------------------- -.. function:: CreateSequence(name, sequence) +.. function:: CreateSequence(name, sequence, role="UNKNOWN") Create a new :class:`SequenceHandle` with the given name and sequence. - :param name: name of the sequence - :type name: str + :param name: Name of the sequence + :type name: :class:`str` :param sequence: String of characters representing the sequence. Only 'word' characters (no digits), '?', '-' and '.' are allowed. In an upcoming release, '?' and '.' will also be forbidden so its best to translate those to 'X' or '-'. - :type sequence: str + :type sequence: :class:`str` + :param role: Role of the sequence (optional) + :type role: :class:`str` :raises InvalidSequence: When the sequence string contains forbidden characters. In the future, '?' and '.' will also raise this exception. .. class:: SequenceHandle + ConstSequenceHandle Represents a sequence. New instances are created with :func:`CreateSequence`. @@ -130,7 +134,7 @@ The SequenceHandle sequence, or, a sequence only consisting of hyphens, -1 is returned. .. method:: AttachView(view) - AttachView(view, [chain_name]) + AttachView(view, chain_name) Attach an :class:`~mol.EntityView` to sequence. The first signature requires that the view contains one chain. If not, an :exc:`IntegrityError` is @@ -176,6 +180,14 @@ The SequenceHandle .. method:: SetName() Set name of the sequence. Also available as the property :attr:`name`. + + .. method:: GetOneLetterCode(pos) + __getitem__(pos) + __getitem__(slice) + + :return: Character at position *pos* of sequence (also supports pythonic + slicing with [] operator) + :rtype: :class:`str` .. attribute:: gapless_string @@ -193,13 +205,29 @@ The SequenceHandle Shorthand for :meth:`GetOffset`/:meth:`SetOffset` + .. attribute:: role + + Role of this sequence. + + :type: :class:`str` + .. method:: __len__() - Returns the length of the sequence (including insertions and deletions) + :return: The length of the sequence (including insertions and deletions) .. method:: __str__() - Returns the sequence as a string. + :return: The sequence as a string. + +.. function:: SequenceFromChain(name, chain) + + :return: Sequence extracted from one letter codes in given *chain* with a + view to the chain attached to it + :rtype: :class:`SequenceHandle` + :param name: Name of the sequence + :type name: :class:`str` + :param chain: Chain from which to extract sequence + :type chain: :class:`~ost.mol.ChainHandle` / :class:`~ost.mol.ChainView` .. function:: Match(s1, s2) @@ -212,16 +240,57 @@ The SequenceHandle case-insensitive comparison of the two sequences. The character 'X' is interpreted as a wild card character that always matches the other sequence. -The SequenceList +The SequenceList -------------------------------------------------------------------------------- +.. function:: CreateSequenceList() + + Creates and returns a new :class:`SequenceList` with no sequences. + .. class:: SequenceList + ConstSequenceList + + Represents a list of sequences. The class provides a row-based interface. + + .. method:: GetCount() + __len__() + + :return: Number of sequences in the list. + :rtype: :class:`int` + + .. method:: AddSequence(sequence) + + Append a sequence to the list. + + .. method:: GetMinLength() + GetMaxLength() + + :return: Minimal / maximal length of the sequences in this list. + :rtype: :class:`int` + + .. method:: FindSequence(name) + + Find sequence with given *name*. If the alignment contains several sequences + with the same name, the first sequence is returned. + + .. method:: SequencesHaveEqualLength() + + :return: True if all sequences have same length. + + .. method:: Take(n) + + :return: First *n* (or last *-n* if *n* negative) sequences. - Represents a list of sequences. The class provides a row-based interface. New - instances are created with :func:`CreateSequenceList`. + .. method:: Slice(first, n) + :return: *n* sequences starting from *first*. -The AlignmentHandle + .. method:: __getitem__(key) + + :return: Access sequence(s) *key* (also supports pythonic slicing). + + +The AlignmentHandle -------------------------------------------------------------------------------- The :class:`AlignmentHandle` represents a list of aligned sequences. In @@ -231,7 +300,7 @@ the same length. New instances of alignments are created with Typically sequence alignments are used column-based, i.e by looking at an aligned columns in the sequence alignment. To get a row-based (sequence) view -on the sequence list, use :meth:`GetSequences()`. +on the sequence list, use :meth:`~AlignmentHandle.GetSequences()`. All functions that operate on an alignment will again produce a valid alignment. This mean that it is not possible to change the length of one sequence, without @@ -266,34 +335,35 @@ an alignment: .. class:: AlignmentHandle - .. note:: - - Several of these methods just forward calls to the sequence. For more - detailed information, have a look at the :class:`SequenceHandle` - documentation. - .. method:: GetSequence(index) - Returns the sequence at the given index, raising an IndexError when trying - to access an inexistent sequence. + :return: Sequence at the given index, raising an IndexError when trying + to access an inexistent sequence. + :rtype: :class:`ConstSequenceHandle` .. method:: GetSequences() - Returns a list of all sequence of the alignment. + :return: List of all sequence of the alignment. Also available as + :attr:`sequences`. + :rtype: :class:`ConstSequenceList` .. method:: GetLength() + __len__() - Returns the length of the alignment. + :return: Length of the alignment. + :rtype: :class:`int` .. method:: GetCount() - Returns the number of sequences in the alignment. - + :return: Number of sequences in the alignment. Also available as + :attr:`sequence_count`. + :rtype: :class:`int` .. method:: ToString(width=80) - Returns a formatted string version of the alignment. The sequences are - split into smaller parts to fit into the number columns specified. + :return: Formatted string version of the alignment. The sequences are + split into smaller parts to fit into the number columns specified. + :rtype: :class:`str` .. code-block:: python @@ -310,16 +380,13 @@ an alignment: .. method:: FindSequence(name) - Find sequence with given name. If the alignment contains several sequences - with the same name, the first sequence is returned. + :return: Sequence with given *name*. If the alignment contains several + sequences with the same name, the first sequence is returned. .. method:: SetSequenceName(seq_index, name) - Set the name of the sequence at index `seq_index` to name - - .. method:: SetSequenceOffset(seq_index, offset) - - Set the sequence offset of sequence at index `seq_index` + Set the name of the sequence at index `seq_index` to `name` + (see :attr:`SequenceHandle.name`). .. method:: Copy() @@ -327,18 +394,24 @@ an alignment: .. method:: GetPos(seq_index, res_index) - Get position of residue with index equal to `res_index` in sequence at index - `seq_index`. + :return: Position of residue with index equal to `res_index` in sequence at + index `seq_index` (see :meth:`SequenceHandle.GetPos`) .. method:: GetResidueIndex(seq_index, pos) - Get residue index of residue at position `pos` in sequence at index - `seq_index`. + :return: Residue index of residue at position `pos` in sequence at index + `seq_index` (see :meth:`SequenceHandle.GetResidueIndex`) + + .. method:: GetResidue(seq_index, pos) + + :return: Attached residue at position `pos` in sequence at index `seq_index` + (see :meth:`SequenceHandle.GetResidue`). .. method:: AttachView(seq_index, view) AttachView(seq_index, view, chain_name) - Attach the given view to the sequence at index `seq_index`. + Attach the given view to the sequence at index `seq_index` + (see :meth:`SequenceHandle.AttachView`). .. method:: Cut(start, end) @@ -396,7 +469,8 @@ an alignment: .. method:: GetSequenceOffset(index) SetSequenceOffset(index, offset) - Get/set the offset for sequence at *index*. + Get/set the offset for sequence at *index* + (see :attr:`SequenceHandle.offset`). :param index: The index of the sequence :type index: :class:`int` @@ -407,7 +481,8 @@ an alignment: .. method:: GetSequenceRole(index) SetSequenceRole(index, role) - Get/Set the sequence role for sequence at *index*. + Get/Set the sequence role for sequence at *index* + (see :attr:`SequenceHandle.role`). :param index: The index of the sequence :type index: :class:`int` @@ -426,6 +501,109 @@ an alignment: .. method:: RemoveSequence(index) Remove sequence at *index* from the alignment. + + .. attribute:: sequences + + Shorthand for :meth:`GetSequences` + + .. attribute:: sequence_count + + Shorthand for :meth:`GetCount` + + .. method:: __getitem__(pos) + + :return: Column at position *pos* of alignment. + :rtype: :class:`AlignedColumn` + + .. method:: __getitem__(slice) + + :return: Columns defined by by pythonic slicing. + :rtype: :class:`AlignedRegion` + + +.. class:: AlignedRegion + + Represents a slice of an :class:`AlignmentHandle`. + + .. method:: GetAlignmentHandle() + + :return: Alignment from which we slices. + :rtype: :class:`AlignmentHandle` + + .. method:: GetLength() + __len__() + + :return: Number of columns in the slice. + + .. method:: __getitem__(pos) + + :return: Column at position *pos* within this slice. + :rtype: :class:`AlignedColumn` + + .. attribute:: start + + Starting position in alignment. + + .. attribute:: end + + One after end position in alignment. + + +.. class:: AlignedColumn + + .. method:: GetIndex() + + :return: Position in alignment. + + .. method:: GetRowCount() + + :return: Number of rows in the column. + + .. method:: GetResidue(row) + + :return: Attached residue for sequence at given *row* of this column + (see :meth:`AlignmentHandle.GetResidue`). + + .. method:: __getitem__(row) + + :return: Character at given *row* of this column. + :rtype: :class:`str` + + .. method:: __str__() + + :return: String representation of column in alignment. + + +Extracting views from sequences +-------------------------------------------------------------------------------- + +.. function:: ViewsFromSequences(seq1, seq2) + + Returns a tuple of entity views containing only the atoms of the aligned + residues. The order of residues in the two views is guaranteed to be the same + but the order of atoms within each residue may differ. If the order of atoms + is crucial (e.g. for :func:`~ost.mol.alg.SuperposeSVD`) either prefilter the + attached views to include only one atom per residue or use the slower (approx. + 50% more runtime) :meth:`AlignmentHandle.GetMatchingBackboneViews`. + + :return: Pair of views including all the aligned residues of the two given + sequences. An alignment is + :rtype: :class:`tuple` with two :class:`~ost.mol.EntityView` + + :raises: :class:`Exception` if sequence lengths do not match or if any of the + sequences is lacking an attached view. + +.. function:: ViewsFromAlignment(aln, index1=0, index2=1) + + :return: Pair of views as in :meth:`ViewsFromSequences`. + :rtype: :class:`tuple` with two :class:`~ost.mol.EntityView` + + :param aln: Alignment from which to extract sequences. + :type aln: :class:`AlignmentHandle` + :param index1: Index of first sequence in *aln* to use. + :type index1: :class:`int` + :param index2: Index of second sequence in *aln* to use. + :type index2: :class:`int` Handling Sequence Profiles -- GitLab