From 4e4e86165764aa6879a212be91f32e8c20912b29 Mon Sep 17 00:00:00 2001 From: Gerardo Tauriello <gerardo.tauriello@unibas.ch> Date: Wed, 3 Feb 2016 17:58:05 +0100 Subject: [PATCH] Modelling update: tests, ClearGaps rewrite, doc --- modelling/doc/index.rst | 6 +- modelling/pymod/_closegaps.py | 16 ++- modelling/pymod/_pipeline.py | 5 +- modelling/src/gap.cc | 8 +- modelling/src/model.cc | 179 +++++--------------------- modelling/src/model.hh | 17 +-- modelling/tests/test_gap_extension.py | 32 +++++ modelling/tests/test_modelling.py | 151 ++++++++++++++++++++++ 8 files changed, 243 insertions(+), 171 deletions(-) diff --git a/modelling/doc/index.rst b/modelling/doc/index.rst index 60b99d64..a0e0a7b5 100644 --- a/modelling/doc/index.rst +++ b/modelling/doc/index.rst @@ -116,7 +116,7 @@ Closing Gaps :type gap: :class:`StructuralGap` :return: Index of next gap in mhandle.gaps after removal. - Returns -1 if last gap was removed. + Returns -1 if last gap was removed or no gaps in *mhandle*. :rtype: :class:`int` :raises: A :exc:`RuntimeError` if any gap in mhandle.gaps is only partially @@ -124,7 +124,9 @@ Closing Gaps .. function:: MergeGaps(mhandle, index) - Merges two gaps mhandle.gaps[index] and mhandle.gaps[index+1]. + Merges two gaps `mhandle.gaps[index]` and `mhandle.gaps[index+1]`. + The residues in between the gaps are removed from `mhandle.model` and added + to the new `mhandle.gaps[index]`. :param mhandle: Modelling handle on which to apply change. :type mhandle: :class:`ModellingHandle` diff --git a/modelling/pymod/_closegaps.py b/modelling/pymod/_closegaps.py index 2ef87d52..c46c126b 100644 --- a/modelling/pymod/_closegaps.py +++ b/modelling/pymod/_closegaps.py @@ -13,7 +13,8 @@ import sys # helper functions def _InitGapExtenders(mhandle, use_scoring_extender): # adds field extender_penalties to mhandle - if use_scoring_extender and not hasattr(mhandle, 'extender_penalties'): + # recomputed at each call (as old behavior) + if use_scoring_extender: # and not hasattr(mhandle, 'extender_penalties'): mhandle.extender_penalties = list() for i in range(len(mhandle.model.chains)): mhandle.extender_penalties.append([0.0] * len(mhandle.seqres[i])) @@ -25,6 +26,11 @@ def _InitGapExtenders(mhandle, use_scoring_extender): def _GetGapExtender(mhandle, actual_gap, actual_chain_idx, use_scoring_extender, max_length=-1): + + # TODO (GT): try removing _InitGapExtenders and recomputing + # extender_penalties (changes as gaps filled!!) + # -> or only once and for all? (see commented hasattr above) + # return appropriate gap extender if use_scoring_extender: return modelling.ScoringGapExtender(actual_gap, 0.8, @@ -774,6 +780,12 @@ def CloseLargeDeletions(mhandle, scorer, structure_db, linker_length = 8, if not actual_extender.Extend(): break + # FAIL (Fragger needs at least 3 residues) + if len(actual_gap.full_seq) < 3: + ost.LogInfo("Failed in CloseLargeDeletions (%s)" % str(gap_orig)) + gap_idx += 1 + continue + # extract gap info n_res = actual_gap.before c_res = actual_gap.after @@ -869,7 +881,7 @@ def CloseLargeDeletions(mhandle, scorer, structure_db, linker_length = 8, # update scorer scorer.SetEnvironment(bb_list, first_num, actual_chain_idx) # will return -1 if last gap removed - gap_idx = modelling.ClearGaps(mhandle, gap_orig) + gap_idx = modelling.ClearGaps(mhandle, actual_gap) ost.LogInfo("Resolved %s by sampling %s as linker" % \ (str(gap_orig), str(actual_gap))) diff --git a/modelling/pymod/_pipeline.py b/modelling/pymod/_pipeline.py index f6d3dbcd..1f493b8e 100644 --- a/modelling/pymod/_pipeline.py +++ b/modelling/pymod/_pipeline.py @@ -180,9 +180,8 @@ def BuildFromRawModel(mhandle): _closegaps.FillLoopsByMonteCarlo(mhandle, scorer, torsion_sampler) if len(mhandle.gaps) > 0: - msg = "There are still unsolved gaps, possibly large deletions. " - msg += "Try to resolve them by sampling a linker region." - ost.LogInfo(msg) + ost.LogInfo("There are still unsolved gaps, possibly large deletions. " + "Try to resolve them by sampling a linker region.") _closegaps.CloseLargeDeletions(mhandle, scorer, structure_db) # check if we succeeded diff --git a/modelling/src/gap.cc b/modelling/src/gap.cc index 5a7016ee..9e7022af 100644 --- a/modelling/src/gap.cc +++ b/modelling/src/gap.cc @@ -11,22 +11,18 @@ String StructuralGap::GetChainName() const { if (this->IsCTerminal()) { return before.GetChain().GetName(); - } - if (this->IsNTerminal()) { + } else { return after.GetChain().GetName(); } - return before.GetChain().GetName(); } ost::mol::ChainHandle StructuralGap::GetChain() const { if (this->IsCTerminal()) { return before.GetChain(); - } - if (this->IsNTerminal()) { + } else { return after.GetChain(); } - return before.GetChain(); } uint StructuralGap::GetChainIndex() const diff --git a/modelling/src/model.cc b/modelling/src/model.cc index 52900d8e..c0965445 100644 --- a/modelling/src/model.cc +++ b/modelling/src/model.cc @@ -49,160 +49,51 @@ bool CheckBackboneAtoms(ResidueView res) } int ClearGaps(ModellingHandle& mhandle, const StructuralGap& gap) { - - String chain_name = gap.GetChainName(); + + // return -1 if no more gaps int next_gap = -1; - - if(gap.IsNTerminal()){ - for(StructuralGapList::iterator i = mhandle.gaps.begin(); i != mhandle.gaps.end();){ - if(i->GetChainName() != chain_name){ - ++i; - continue; - } - if(i->IsCTerminal()){ - if(gap.after.GetNumber() > i->before.GetNumber()){ - if(gap.after.GetNumber() < i->before.GetNumber() + i->GetLength()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - }else if(i->IsNTerminal()){ - if(i->after.GetNumber() <= gap.after.GetNumber()){ - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - }else{ - if(gap.after.GetNumber() > i->before.GetNumber()){ - if(gap.after.GetNumber() < i->after.GetNumber()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - } - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } + + // extract info for this gap + String my_chain_name = gap.GetChainName(); + ResNum my_b_res(0); + if (!gap.IsNTerminal()) my_b_res = gap.before.GetNumber(); + ResNum my_a_res = my_b_res + gap.GetLength() + 1; + + for (StructuralGapList::iterator i = mhandle.gaps.begin(); + i != mhandle.gaps.end(); ) { + + // skip different chains + if (i->GetChainName() != my_chain_name) { ++i; + continue; } - return next_gap; - } - if(gap.IsCTerminal()){ - for(StructuralGapList::iterator i = mhandle.gaps.begin(); i != mhandle.gaps.end();){ - if(i->GetChainName() != chain_name){ - ++i; - continue; - } - if(i->IsNTerminal()){ - if(gap.before.GetNumber() < i->after.GetNumber()){ - if(gap.before.GetNumber() > i->after.GetNumber() - i->GetLength()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - }else if(i->IsCTerminal()){ - if(i->before.GetNumber() >= gap.before.GetNumber()){ - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - }else{ - if(gap.before.GetNumber() < i->after.GetNumber()){ - if(gap.before.GetNumber() > i->before.GetNumber()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - } - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - ++i; - } - return next_gap; - } + // get res_nums + ResNum b_res(0); + if (!i->IsNTerminal()) b_res = i->before.GetNumber(); + ResNum a_res = b_res + i->GetLength() + 1; - for(StructuralGapList::iterator i = mhandle.gaps.begin(); i != mhandle.gaps.end();){ - if(i->GetChainName() != chain_name){ - ++i; - continue; - } - if(i->IsNTerminal()){ - if(gap.before.GetNumber() < i->after.GetNumber()){ - if(gap.after.GetNumber() < i->after.GetNumber()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - if(gap.before.GetNumber() > i->after.GetNumber()-i->GetLength()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - } - else if(i->IsCTerminal()){ - if(gap.after.GetNumber() > i->before.GetNumber()){ - if(gap.before.GetNumber() > i->before.GetNumber()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - } - if(gap.after.GetNumber() < i->before.GetNumber() + i->GetLength()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); - } - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - }else{ - if(i->before.GetNumber() >= gap.before.GetNumber() && - i->after.GetNumber() <= gap.after.GetNumber()){ - i = mhandle.gaps.erase(i); - next_gap = i-mhandle.gaps.begin(); - continue; - } - if(i->before.GetNumber() > gap.before.GetNumber() && - i->before.GetNumber() < gap.after.GetNumber()){ - std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); + // check full overlap + if (b_res >= my_b_res && a_res <= my_a_res) { + i = mhandle.gaps.erase(i); + if (i == mhandle.gaps.end()) { + next_gap = -1; + } else { + next_gap = i - mhandle.gaps.begin(); } - if(i->after.GetNumber() > gap.before.GetNumber() && - i->after.GetNumber() < gap.after.GetNumber()){ + } else { + // check partial overlaps + if ( (my_b_res > b_res && my_b_res < a_res) + || (my_a_res > b_res && my_a_res < a_res)) { std::stringstream ss; - ss << "Tried to clear gap "<<*i<<" but"<<gap; - ss << " is only overlapping not fully enclosing!"; - throw promod3::Error(ss.str()); + ss << "Tried to clear gap " << *i << " but " << gap + << " is only overlapping not fully enclosing!"; + throw promod3::Error(ss.str()); } + // moving on + ++i; } - ++i; } - return next_gap; } diff --git a/modelling/src/model.hh b/modelling/src/model.hh index 26f792b0..8e7be7d2 100644 --- a/modelling/src/model.hh +++ b/modelling/src/model.hh @@ -32,24 +32,13 @@ struct ModellingHandle { ost::seq::SequenceList seqres; }; -/// \brief Removes all gaps from mhandle which are fully enclosed by given gap. -/// -/// \returns Index of next gap in mhandle.gaps after removal. -/// Returns -1 if last gap was removed. -/// -/// \throws promod3::Error if any gap in mhandle.gaps is only partially -/// enclosed by given gap. +// see Python doc int ClearGaps(ModellingHandle& mhandle, const StructuralGap& gap); -/// \brief Merges two gaps mhandle.gaps[index] and mhandle.gaps[index+1]. -/// -/// \throws promod3::Error if indices out of range or if trying to merge -/// N-terminal gap with a C-terminal gap. +// see Python doc void MergeGaps(ModellingHandle& mhandle, uint index); -/// \brief Removes all gaps in mhandle which are at N-terminal or C-terminal. -/// -/// \returns Number of gaps which were removed. +// see Python doc int RemoveTerminalGaps(ModellingHandle& mhandle); /// \brief copies all atom of src_res to dst_res diff --git a/modelling/tests/test_gap_extension.py b/modelling/tests/test_gap_extension.py index 49595437..3281293a 100644 --- a/modelling/tests/test_gap_extension.py +++ b/modelling/tests/test_gap_extension.py @@ -82,6 +82,38 @@ class GapExtension(unittest.TestCase): self.assertFalse(ext.Extend()) self.checkGap(gap, 4, 8, 'TLN') + def testFullGapExtenderMerge(self): + mhandle = self.getRawModel() + modelling.MergeGaps(mhandle, 1) + # use first gap + gap = mhandle.gaps[0].Copy() + self.checkGap(gap, 4, 8, 'TLN') + # setup extender + ext = modelling.FullGapExtender(gap, mhandle.seqres[0]) + self.assertTrue(ext.Extend()) + self.checkGap(gap, 3, 8, 'ATLN') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 4, 9, 'TLNG') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 2, 8, 'AATLN') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 3, 9, 'ATLNG') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 1, 8, 'AAATLN') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 2, 9, 'AATLNG') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 1, 9, 'AAATLNG') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 4, 19, 'TLNGFTVPAGNTLV') + self.assertTrue(ext.Extend()) + self.checkGap(gap, 3, 19, 'ATLNGFTVPAGNTLV') + # check rest + exp_max_length = mhandle.seqres[0].GetLength()-3 + while ext.Extend(): + self.assertLessEqual(gap.length, exp_max_length) + self.assertEqual(gap.length, exp_max_length) + def testScoringGapExtender(self): mhandle = self.getRawModel() seq_length = mhandle.seqres[0].GetLength() diff --git a/modelling/tests/test_modelling.py b/modelling/tests/test_modelling.py index 2e6eff48..909fc2db 100644 --- a/modelling/tests/test_modelling.py +++ b/modelling/tests/test_modelling.py @@ -106,6 +106,157 @@ class ModellingTests(unittest.TestCase): self.assertTrue(residues[2].FindAtom("CB").IsValid()) self.assertTrue(residues[3].FindAtom("CB").IsValid()) + def testMergeGaps(self): + tpl = io.LoadPDB('data/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGTHN'), + seq.CreateSequence('tpl', 'N-N-A-LF')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + seqres = ''.join([r.one_letter_code for r in mhandle.model.residues]) + self.assertEqual(seqres, 'DFGHN') + self.assertEqual(len(mhandle.gaps), 3) + modelling.MergeGaps(mhandle, 0) + seqres = ''.join([r.one_letter_code for r in mhandle.model.residues]) + self.assertEqual(seqres, 'DGHN') + self.assertEqual(len(mhandle.gaps), 2) + self.assertEqual(str(mhandle.gaps[0]), 'A.ASP1-(DFA)-A.GLY5') + modelling.MergeGaps(mhandle, 0) + seqres = ''.join([r.one_letter_code for r in mhandle.model.residues]) + self.assertEqual(seqres, 'DHN') + self.assertEqual(len(mhandle.gaps), 1) + self.assertEqual(str(mhandle.gaps[0]), 'A.ASP1-(DFAGT)-A.HIS7') + with self.assertRaises(RuntimeError): + modelling.MergeGaps(mhandle, 0) + + def testClearGapsExceptions(self): + tpl = io.LoadPDB('data/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGTHN'), + seq.CreateSequence('tpl', 'N-N-A-LF')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # get gaps before we remove residues in MergeGaps + mych = mhandle.model.chains[0] + mygap = modelling.StructuralGap(mych.FindResidue(0), + mych.FindResidue(3), "DD") + mygap2 = modelling.StructuralGap(mych.FindResidue(3), + mych.FindResidue(9), "AGTHN") + mygap3 = modelling.StructuralGap(mych.FindResidue(3), + mych.FindResidue(5), "A") + # check + modelling.MergeGaps(mhandle, 0) + modelling.MergeGaps(mhandle, 0) + self.assertEqual(len(mhandle.gaps), 1) + with self.assertRaises(RuntimeError): + modelling.ClearGaps(mhandle, mygap) + with self.assertRaises(RuntimeError): + modelling.ClearGaps(mhandle, mygap2) + with self.assertRaises(RuntimeError): + modelling.ClearGaps(mhandle, mygap3) + self.assertEqual(len(mhandle.gaps), 1) + + def testClearGaps(self): + tpl = io.LoadPDB('data/2dbs.pdb') + aln = seq.CreateAlignment( + seq.CreateSequence('trg', 'TLNGFTVPAGNTLVLN---PDKG--ATVTM-A'), + seq.CreateSequence('tpl', 'N-GG-TLLI--PNGTYHFLGIQMKSNVHIRVE')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + self.assertEqual(len(mhandle.gaps), 6) + self.assertEqual(modelling.ClearGaps(mhandle, mhandle.gaps[1]), 1) + self.assertEqual(len(mhandle.gaps), 5) + self.assertEqual(modelling.ClearGaps(mhandle, mhandle.gaps[2]), 2) + self.assertEqual(len(mhandle.gaps), 4) + # special gaps + mych = mhandle.model.chains[0] + mygap = modelling.StructuralGap(mych.FindResidue(8), + mych.FindResidue(13), "AGNT") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), 1) + self.assertEqual(len(mhandle.gaps), 3) + mygap = modelling.StructuralGap(mych.FindResidue(19), + mych.FindResidue(22), "GA") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), 1) + self.assertEqual(len(mhandle.gaps), 2) + mygap = modelling.StructuralGap(mych.FindResidue(25), + mych.FindResidue(27), "A") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), -1) + self.assertEqual(len(mhandle.gaps), 1) + mygap = modelling.StructuralGap(mych.FindResidue(0), + mych.FindResidue(3), "TL") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), -1) + self.assertEqual(len(mhandle.gaps), 0) + self.assertEqual(modelling.ClearGaps(mhandle, mygap), -1) + + def testClearMultipleGaps(self): + tpl = io.LoadPDB('data/2dbs.pdb') + aln = seq.CreateAlignment( + seq.CreateSequence('trg', 'TLNGFTVPAGNTLVLN---PD--KG---ATVTMA'), + seq.CreateSequence('tpl', 'NGG--TL--LI--PNGTYHFLGIQMKSNVHIRVE')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + self.assertEqual(len(mhandle.gaps), 6) + mych = mhandle.model.chains[0] + mygap = modelling.StructuralGap(mych.FindResidue(1), + mych.FindResidue(10), "LNGFTVPA") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), 0) + self.assertEqual(len(mhandle.gaps), 4) + mygap = modelling.StructuralGap(mych.FindResidue(11), + mych.FindResidue(17), "TLVLN") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), 0) + self.assertEqual(len(mhandle.gaps), 2) + mygap = modelling.StructuralGap(mych.FindResidue(18), + mych.FindResidue(23), "KGAT") + self.assertEqual(modelling.ClearGaps(mhandle, mygap), -1) + self.assertEqual(len(mhandle.gaps), 0) + + def testClearGapsTermnini(self): + tpl = io.LoadPDB('data/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGTHN'), + seq.CreateSequence('tpl', '--NNALF-')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + self.assertEqual(len(mhandle.gaps), 2) + self.assertEqual(modelling.ClearGaps(mhandle, mhandle.gaps[0]), 0) + self.assertEqual(len(mhandle.gaps), 1) + self.assertEqual(modelling.ClearGaps(mhandle, mhandle.gaps[0]), -1) + self.assertEqual(len(mhandle.gaps), 0) + + def testRemoveTerminalGaps(self): + tpl = io.LoadPDB('data/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGTHN'), + seq.CreateSequence('tpl', '--NNALF-')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + self.assertEqual(len(mhandle.gaps), 2) + self.assertEqual(modelling.RemoveTerminalGaps(mhandle), 2) + self.assertEqual(len(mhandle.gaps), 0) + + def testRemoveTerminalGapsN(self): + tpl = io.LoadPDB('data/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGTH'), + seq.CreateSequence('tpl', '--NNALF')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + self.assertEqual(len(mhandle.gaps), 1) + self.assertEqual(modelling.RemoveTerminalGaps(mhandle), 1) + self.assertEqual(len(mhandle.gaps), 0) + + def testRemoveTerminalGapsC(self): + tpl = io.LoadPDB('data/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'FAGTHN'), + seq.CreateSequence('tpl', 'NNALF-')) + aln.AttachView(1, tpl.CreateFullView()) + mhandle = modelling.BuildRawModel(aln) + # check + self.assertEqual(len(mhandle.gaps), 1) + self.assertEqual(modelling.RemoveTerminalGaps(mhandle), 1) + self.assertEqual(len(mhandle.gaps), 0) + if __name__ == "__main__": from ost import testutils -- GitLab