diff --git a/modelling/doc/pipeline.rst b/modelling/doc/pipeline.rst index bd114c4575734a8a2ecbbf73c0699e19e01334e1..3dd2f3128c8f90265f201114ca546b48f59b36a0 100644 --- a/modelling/doc/pipeline.rst +++ b/modelling/doc/pipeline.rst @@ -176,77 +176,9 @@ Build Raw Modelling Handle :return: A deep copy of the current handle :rtype: :class:`ModellingHandle` + - -.. function:: BuildRawModel(aln, include_ligands=False, chain_names=\ - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz",\ - spdbv_style=False) - - Builds a raw (pseudo) model from the alignment. Can either take a single - alignment handle or an alignment handle list. Every list item is treated as a - single chain in the final raw model. - - Each alignment handle must contain exactly two sequences and the second - sequence is considered the template sequence, which must have a - :class:`~ost.mol.EntityView` attached. - - This is a basic protein core modelling algorithm that copies backbone - coordinates based on the sequence alignment. For matching residues, the - side chain coordinates are also copied. Gaps are ignored. Hydrogen an - deuterium atoms are not copied into the model. - - The function tries to reuse as much as possible from the template. Modified - residues are treated as follows: - - - Selenium methionine residues are converted to methionine - - - Side chains which contain all atoms of the parent amino acid, e.g. - phosphoserine are copied as a whole with the modifications stripped off. - - Residues with missing backbone atoms and D-peptides are generally skipped and - treated as gaps. Missing Cbeta atoms in backbone are ok and reconstructed. - If all residues are skipped (e.g. Calpha traces), we report an error and - return an empty model. - - Residue numbers are set such that missing residue in gaps are honoured and - subsequent loop modelling can insert new residues without having to renumber. - **The numbering of residues starts for every chain with the value 1**. - - The returned :class:`ModellingHandle` stores the obtained raw model as well - as information about insertions and deletions in the gaps list. - - :param aln: Single alignment handle for raw model with single chain or - list of alignment handles for raw model with multiple chains. - :type aln: :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList` - - :param include_ligands: True, if we wish to include ligands in the model. This - searches for ligands in all OST handles of the views - attached to the alignments. Ligands are identified - with the `ligand` property in the handle (set by OST - based on HET records) or by the chain name '_' (as set - in SMTL). All ligands are added to a new chain named - '_'. - :type include_ligands: :class:`bool` - - :param chain_names: Chains are named by a single chanacter taken from this. - :type chain_names: :class:`str` - - :param spdbv_style: True, if we need a model in the old SPDBV style. - :type spdbv_style: :class:`bool` - - :return: Raw (pseudo) model from the alignment. - :rtype: :class:`ModellingHandle` - - :raises: A :exc:`RuntimeError` when: - - - the alignments do not have two sequences - - the second sequence does not have an attached structure - - the residues of the template structure do not match with the - alignment sequence (note that you can set an "offset" (see - :meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`) for the - template sequence (but not for the target)) - - the target sequence has a non-zero offset (cannot be honored as - the resulting model will always start its residue numbering at 1) +.. autofunction:: BuildRawModel The Default Pipeline -------------------------------------------------------------------------------- diff --git a/modelling/pymod/CMakeLists.txt b/modelling/pymod/CMakeLists.txt index d000ba36e7b68adea62bb41ce064ca9b0b03b901..58e2665c98f6d6ac1e81901a956547ddaa3a2836 100644 --- a/modelling/pymod/CMakeLists.txt +++ b/modelling/pymod/CMakeLists.txt @@ -26,6 +26,7 @@ set(MODELLING_PYMOD _monte_carlo.py _mhandle_helper.py _alignment_fiddling.py + _raw_model.py ) pymod(NAME modelling diff --git a/modelling/pymod/__init__.py b/modelling/pymod/__init__.py index 54749fcabdaf32fe3864307ec7ae2ce7a8320a5f..6cf9be125390e77c9307fdff5c346c56a2f540c0 100644 --- a/modelling/pymod/__init__.py +++ b/modelling/pymod/__init__.py @@ -26,3 +26,4 @@ from ._denovo import * from ._fragger_handle import * from ._monte_carlo import * from ._mhandle_helper import * +from ._raw_model import * diff --git a/modelling/pymod/_raw_model.py b/modelling/pymod/_raw_model.py new file mode 100644 index 0000000000000000000000000000000000000000..dd2b26e78c31cc7dc659e652410a2d8e45ac71e6 --- /dev/null +++ b/modelling/pymod/_raw_model.py @@ -0,0 +1,132 @@ +# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +# Biozentrum - University of Basel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# internal +from ._modelling import * +# external +import ost + +def BuildRawModel(aln, chain_names = None, include_ligands = False, + spdbv_style = False): + '''Builds a raw (pseudo) model from the alignment. Can either take a single + alignment handle or an alignment handle list. Every list item is treated as a + single chain in the final raw model. + + Each alignment handle must contain exactly two sequences and the second + sequence is considered the template sequence, which must have a + :class:`~ost.mol.EntityView` attached. + + This is a basic protein core modelling algorithm that copies backbone + coordinates based on the sequence alignment. For matching residues, the + side chain coordinates are also copied. Gaps are ignored. Hydrogen an + deuterium atoms are not copied into the model. + + The function tries to reuse as much as possible from the template. Modified + residues are treated as follows: + + - Selenium methionine residues are converted to methionine + + - Side chains which contain all atoms of the parent amino acid, e.g. + phosphoserine are copied as a whole with the modifications stripped off. + + Residues with missing backbone atoms and D-peptides are generally skipped and + treated as gaps. Missing Cbeta atoms in backbone are ok and reconstructed. + If all residues are skipped (e.g. Calpha traces), we report an error and + return an empty model. + + Residue numbers are set such that missing residue in gaps are honoured and + subsequent loop modelling can insert new residues without having to renumber. + **The numbering of residues starts for every chain with the value 1**. + + The returned :class:`ModellingHandle` stores the obtained raw model as well + as information about insertions and deletions in the gaps list. + + :param aln: Single alignment handle for raw model with single chain or + list of alignment handles for raw model with multiple chains. + :type aln: :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList` + + :param include_ligands: True, if we wish to include ligands in the model. This + searches for ligands in all OST handles of the views + attached to the alignments. Ligands are identified + with the `ligand` property in the handle (set by OST + based on HET records) or by the chain name '_' (as set + in SMTL). All ligands are added to a new chain named + '_'. + :type include_ligands: :class:`bool` + + :param chain_names: If set, this overrides the default chain naming + (chains are consecutively named according to + characters in + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'). + If *aln* is of type :class:`ost.seq.AlignmentHandle`, + *chain_names* is expected to be a :class:`str`. + If *aln* is of type :class:`ost.seq.AlignmentList`, + chain_names is expected to be a :class:`list` of + :class:`str` of same size as *aln*. + :type chain_names: :class:`str` / :class:`list` + + :param spdbv_style: True, if we need a model in the old SPDBV style. + :type spdbv_style: :class:`bool` + + :return: Raw (pseudo) model from the alignment. + :rtype: :class:`ModellingHandle` + + :raises: A :exc:`RuntimeError` when: + + - the alignments do not have two sequences + - the second sequence does not have an attached structure + - the residues of the template structure do not match with the + alignment sequence (note that you can set an "offset" (see + :meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`) for the + template sequence (but not for the target)) + - the target sequence has a non-zero offset (cannot be honored as + the resulting model will always start its residue numbering at 1) + ''' + + aln_list = None + name_list = None + + if isinstance(aln, ost.seq.AlignmentHandle): + aln_list = ost.seq.AlignmentList() + aln_list.append(aln) + if chain_names is None: + name_list = ['A'] + elif isinstance(chain_names, str): + name_list = [chain_names] + else: + raise RuntimeError('Expect chain_names to be of type str if aln is'\ + ' of type ost.seq.AlignmentHandle') + elif isinstance(aln, ost.seq.AlignmentList): + aln_list = aln + if chain_names is None: + def_names = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopq'\ + 'rstuvwxyz' + if len(aln_list) > len(def_names): + raise RuntimeError('Max of ' + str(len(def_names)) + \ + ' alns if no chain_names provided') + name_list = [str(c) for c in def_names[:len(aln_list)]] + elif isinstance(chain_names, list): + if len(chain_names) != len(aln_list): + raise RuntimeError('Number of alns and chain_names must be '\ + 'consistent') + name_list = chain_names + else: + raise RuntimeError('chain_names must be list if aln is of type '\ + 'ost.seq.AlignmentList') + else: + raise RuntimeError('aln must be of type ost.seq.AlignmentHandle or '\ + 'ost.seq.AlignmentList') + + return MHandleFromAln(aln_list, name_list, include_ligands, spdbv_style) diff --git a/modelling/pymod/export_model.cc b/modelling/pymod/export_model.cc index a4a0670c287dc43bf46d882600010bb9fdf9a8dd..b8de2bb66947b22ed8ca5c03149b19efeb5b751f 100644 --- a/modelling/pymod/export_model.cc +++ b/modelling/pymod/export_model.cc @@ -22,15 +22,18 @@ using namespace boost::python; using namespace promod3::modelling; -ModellingHandle (*BuildRawModelHandle)(const ost::seq::AlignmentHandle&, - bool, const String&, bool) - = &BuildRawModel; -ModellingHandle (*BuildRawModelHandleList)(const ost::seq::AlignmentList&, - bool, const String&, bool) - = &BuildRawModel; namespace { +ModellingHandle WrapMHandleFromAln(const ost::seq::AlignmentList& aln, + const boost::python::list& chain_names, + bool include_ligands, + bool spdbv_style) { + std::vector<String> v_chain_names; + promod3::core::ConvertListToVector(chain_names, v_chain_names); + return MHandleFromAln(aln, v_chain_names, include_ligands, spdbv_style); +} + int WrapCountEnclosedGaps(ModellingHandle& mhandle, const StructuralGap& gap) { return CountEnclosedGaps(mhandle, gap, false); } @@ -203,14 +206,7 @@ void export_model() (arg("mhandle"), arg("bb_list"), arg("start_resnum"), arg("chain_idx"))); def("InsertLoopClearGaps", InsertLoopClearGaps, (arg("mhandle"), arg("bb_list"), arg("gap"))); - def("BuildRawModel", BuildRawModelHandle, - (arg("aln"), - arg("include_ligands")=false, - arg("chain_names")="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", - arg("spdbv_style")=false)); - def("BuildRawModel", BuildRawModelHandleList, - (arg("aln"), - arg("include_ligands")=false, - arg("chain_names")="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", - arg("spdbv_style")=false)); + def("MHandleFromAln", &WrapMHandleFromAln, (arg("aln"), arg("chain_names"), + arg("include_ligands")=false, + arg("spdbv_style")=false)); } diff --git a/modelling/src/model.cc b/modelling/src/model.cc index 2eafa9df60de5b57606242400f8ab632bc160d28..5d27cde5f6471f819c9bfa6fc326639a9a9aeb71 100644 --- a/modelling/src/model.cc +++ b/modelling/src/model.cc @@ -853,45 +853,29 @@ bool InsertDummyAtoms(ResidueHandle dst_res, XCSEditor& edi) { return true; } -ModellingHandle BuildRawModel(const seq::AlignmentHandle& aln, - bool include_ligands, - const String& chain_names, - bool spdbv_style) -{ - seq::AlignmentList l; - l.push_back(aln); - return BuildRawModel(l, - include_ligands, - chain_names, - spdbv_style); -} - -ModellingHandle BuildRawModel(const seq::AlignmentList& aln, - bool include_ligands, - const String& chain_names, - bool spdbv_style) +ModellingHandle MHandleFromAln(const seq::AlignmentList& aln, + const std::vector<String>& chain_names, + bool include_ligands, + bool spdbv_style) { core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped( - "modelling::BuildRawModel", 2); + "modelling::MHandleFromAln", 2); + + if(aln.size() != chain_names.size()) { + throw promod3::Error("Require exactly one chain name per alignment"); + } ModellingHandle result; EntityHandle model=CreateEntity(); XCSEditor edi=model.EditXCS(BUFFERED_EDIT); result.model=model; - uint chain_name_idx = 0; - - for(seq::AlignmentList::const_iterator it=aln.begin(); - it!=aln.end(); ++it, ++chain_name_idx) { - if(chain_name_idx >= chain_names.size()) { - throw promod3::Error("running out of chain names"); - } + for(uint i = 0; i < aln.size(); ++i) { StructuralGapList gap_list; - BuildRawChain(*it, edi, gap_list, chain_names[chain_name_idx], spdbv_style); + AddRawChain(aln[i], edi, gap_list, chain_names[i], spdbv_style); result.gaps.insert(result.gaps.end(), gap_list.begin(), gap_list.end()); - seq::SequenceHandle seqres = seq::CreateSequence( - chain_names.substr(chain_name_idx, 1), - it->GetSequence(0).GetGaplessString()); + seq::SequenceHandle seqres = seq::CreateSequence(chain_names[i], + aln[i].GetSequence(0).GetGaplessString()); result.seqres.AddSequence(seqres); } @@ -977,11 +961,11 @@ void AddLigands(const seq::AlignmentList& aln, XCSEditor& edi, } } -void BuildRawChain(const seq::AlignmentHandle& aln, - XCSEditor& edi, - StructuralGapList& gap_list, - char chain_name, - bool spdbv_style) +void AddRawChain(const seq::AlignmentHandle& aln, + XCSEditor& edi, + StructuralGapList& gap_list, + const String& chain_name, + bool spdbv_style) { core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped( @@ -997,7 +981,7 @@ void BuildRawChain(const seq::AlignmentHandle& aln, throw promod3::Error("we cannot honor non-zero offsets for targets"); } - ChainHandle chain=edi.InsertChain(String(1,chain_name)); + ChainHandle chain=edi.InsertChain(chain_name); // we enforce res. numbering for target to start at 1 int res_num = 0; diff --git a/modelling/src/model.hh b/modelling/src/model.hh index a0facde15bd65f2bdbe980afd4ec249872815aad..df1df9d6f08c59f1d1e1111b94ef1207e20a3c0f 100644 --- a/modelling/src/model.hh +++ b/modelling/src/model.hh @@ -160,9 +160,10 @@ bool CopyModified(ost::mol::ResidueView src_res, bool& has_cbeta); -/// \brief Build raw model from alignment. +/// \brief Build raw model from list of alignments. /// -/// The second sequence of aln must have a view attached. +/// Every item in the list represents a target chain and the second sequence of +/// every aln must have a view attached. /// /// Basic protein core modeling algorithm that copies backbone coordinates based /// on the sequence alignment. For matching residues, the sidechain coordinates @@ -183,31 +184,19 @@ bool CopyModified(ost::mol::ResidueView src_res, /// Conopology::Instance().SetDefaultLib) as is done by default for /// Python imports (see core __init__.py) ModellingHandle -BuildRawModel(const ost::seq::AlignmentHandle& aln, - bool include_ligands=false, - const String& chain_names="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefgh" - "ijklmnopqrstuvwxyz", - bool spdbv_style=false); - -/// \brief Build raw model from alignment list. -/// -/// Every item in the list represents one target chain and the -/// second sequence must have a view attached. See above. -ModellingHandle -BuildRawModel(const ost::seq::AlignmentList& aln, - bool include_ligands=false, - const String& chain_names="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefgh" - "ijklmnopqrstuvwxyz", - bool spdbv_style=false); - -/// \brief get called by BuildRawModel, creates one single chain. -void BuildRawChain(const ost::seq::AlignmentHandle& aln, - ost::mol::XCSEditor& edi, - StructuralGapList& gap_list, - char chain_name, - bool spdbv_style=false); - -/// \brief handles ligands and gets called by BuildRawModel if add_ligand flag +MHandleFromAln(const ost::seq::AlignmentList& aln, + const std::vector<String>& chain_names, + bool include_ligands=false, + bool spdbv_style=false); + +/// \brief get called by MHandleFromAln, adds one single chain. +void AddRawChain(const ost::seq::AlignmentHandle& aln, + ost::mol::XCSEditor& edi, + StructuralGapList& gap_list, + const String& chain_name, + bool spdbv_style=false); + +/// \brief handles ligands and gets called by MHandleFromAln if add_ligand flag /// is true. void AddLigands(const ost::seq::AlignmentList& aln, ost::mol::XCSEditor& edi, diff --git a/modelling/tests/test_modelling.py b/modelling/tests/test_modelling.py index 0cd6b31046f3bf433cfd5cdcdb76ee4790f95c75..382b103546382feab0f3adb144240c905846e69e 100644 --- a/modelling/tests/test_modelling.py +++ b/modelling/tests/test_modelling.py @@ -60,6 +60,40 @@ class ModellingTests(unittest.TestCase): seq.CreateSequence('B', 'ac-ef')) self.assertRaises(RuntimeError, modelling.BuildRawModel, aln) + def testRaiseInvalidChainNames(self): + tpl = io.LoadPDB('data/gly.pdb') + aln = io.LoadAlignment('data/seq.fasta') + aln.AttachView(1, tpl.CreateFullView()) + + # perform stuff that doesn't raise, i.e. do it correctly + result = modelling.BuildRawModel(aln) + result = modelling.BuildRawModel(aln, chain_names='cheese') + self.assertEqual(result.model.chains[0].GetName(), 'cheese') + aln_lst = seq.AlignmentList() + aln_lst.append(aln) + aln_lst.append(aln) + result = modelling.BuildRawModel(aln_lst) + result = modelling.BuildRawModel(aln_lst, ['cheese', 'steak']) + self.assertEqual(result.model.chains[0].GetName(), 'cheese') + self.assertEqual(result.model.chains[1].GetName(), 'steak') + + # we only accept a string as chain_names if aln is AlignmentHandle + self.assertRaises(RuntimeError, modelling.BuildRawModel, aln, 1) + self.assertRaises(RuntimeError, modelling.BuildRawModel, aln, ['A']) + + # we only accept a list as chain_names if aln is AlignmentList + self.assertRaises(RuntimeError, modelling.BuildRawModel, aln_lst, 1) + self.assertRaises(RuntimeError, modelling.BuildRawModel, aln_lst, 'A') + # size also matters... + self.assertRaises(RuntimeError, modelling.BuildRawModel, aln_lst, ['A']) + + # increase size of aln_list => at some point we should run out of + # default chain_names + aln_lst = 100*[aln] + self.assertRaises(RuntimeError, modelling.BuildRawModel, aln_lst) + + + def testModeledSequence(self): # test if the model has the sequence we want. tpl = io.LoadPDB('data/gly.pdb')