diff --git a/modules/index.rst b/modules/index.rst index b2f5bc3fb85ae35f058f08ee1213537d1dfe896f..307bbce6c93821272ccc07399e87c8256f603ce6 100644 --- a/modules/index.rst +++ b/modules/index.rst @@ -16,6 +16,7 @@ OpenStructure documentation img/base/img img/alg/alg seq/base/seq + seq/alg/seqalg io/io gfx/gfx gui/gui @@ -56,7 +57,7 @@ Density Maps and Images Sequences and Alignments -------------------------------------------------------------------------------- -**Overview**: :doc:`sequence module <seq/base/seq>` +**Overview**: :doc:`sequence module <seq/base/seq>` | :doc:`sequence algorithms <seq/alg/seqalg>` **Input/Output**: :ref:`loading and saving sequences <seq-io>` diff --git a/modules/seq/alg/doc/seqalg.rst b/modules/seq/alg/doc/seqalg.rst new file mode 100644 index 0000000000000000000000000000000000000000..ca6476b663e7ccfe637bf083ed31ef63cdb3d383 --- /dev/null +++ b/modules/seq/alg/doc/seqalg.rst @@ -0,0 +1,33 @@ +:mod:`mol.alg <ost.seq.alg>` -- Algorithms for Sequences +================================================================================ + +.. currentmodule:: ost.seq.alg + + +.. function:: MergePairwiseAlignments(pairwise_alns, ref_seq) + + Merge a list of pairwise alignments into a multiple sequence alignments. This + function uses the reference sequence as the anchor and inserts gaps where + needed. This is also known as the *star method*. + + The resulting multiple sequence alignment provides a simple way to map between + residues of pairwise alignments, e.g. to compare distances in two structural + templates. + + There are a few things to keep in mind when using this function: + + - The reference sequence mustn't contain any gaps + + - The first sequence of each pairwise alignments corresponds to the reference + sequence. Apart from the presence of gaps, these two sequences must be + completely identical. + + - The resulting multiple sequence alignment is by no means optimal. For + better results, consider using a multiple-sequence alignment program such + as MUSCLE or ClustalW. + + - Residues in columns where the reference sequence has gaps should not be + considered as aligned. There is no information in the pairwise alignment to + guide the merging, the result is undefined. + +.. autofunction:: AlignToSEQRES \ No newline at end of file diff --git a/modules/seq/alg/pymod/__init__.py b/modules/seq/alg/pymod/__init__.py index 5d4730f17b92b7eed13cf47fe02d802e3267364b..bb1ea4e829b6a06e7d0240b227d6ae467aed9cff 100644 --- a/modules/seq/alg/pymod/__init__.py +++ b/modules/seq/alg/pymod/__init__.py @@ -1,5 +1,44 @@ from _seq_alg import * from ost.seq.alg.mat import * +def AlignToSEQRES(chain, seqres): + """ + Aligns the residues of chain to the SEQRES sequence, inserting gaps where + needed. The function uses the connectivity of the protein backbone to find + consecutive peptide fragments. These fragments are then aligned to the SEQRES + sequence. + + All the non-ligand, peptide-linking residues of the chain must be listed in + SEQRES. If there are any additional residues in the chain, the function raises + a ValueError. + + :returns: The alignment of the residues in the chain and the SEQRES entries. + :rtype: :class:`~ost.seq.AlignmentHandle` + """ + from ost import seq + from ost import mol + residues=chain.residues + if len(residues)==0: + return None + fragments=[residues[0].one_letter_code] + for r1, r2 in zip(residues[:-2], residues[1:]): + if not r2.IsPeptideLinking() or r2.IsLigand(): + continue + if not mol.InSequence(r1, r2): + fragments.append('') + fragments[-1]+=r2.one_letter_code + ss=str(seqres) + pos=0 + aln_seq='' + for frag in fragments: + new_pos=ss.find(frag, pos) + if new_pos==-1: + raise ValueError('"%s" is not a substring of "%s"' % (frag, ss)) + aln_seq+='-'*(new_pos-pos)+frag + pos=new_pos+len(frag) + aln_seq+='-'*(len(seqres)-len(aln_seq)) + return seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)), + seq.CreateSequence('atoms', aln_seq)) + \ No newline at end of file