diff --git a/rawmodel/pymod/_closegaps.py b/rawmodel/pymod/_closegaps.py index db634b37f0c2197fd06d1427fd20c5b730fe1c78..4b024ec0e7f524ecdd6c273adf77fac6f3a2ef41 100644 --- a/rawmodel/pymod/_closegaps.py +++ b/rawmodel/pymod/_closegaps.py @@ -160,9 +160,34 @@ def _MergeGapsByDistance(self, distance): Check if two neighbouring gaps are at max. *distance* residues apart from each other. Then delete the residues and store a new gap spanning the whole stretch of original gaps and the deleted region. Original gaps will be - removed. + removed. Stem residues count to the gap, so **A-A-A** has a distance of 0. - EXAMPLE! + .. testcode:: mergegapsbydist + :hide: + + from promod3 import rawmodel + + tpl = ost.io.LoadPDB('../tests/rawmodel/data/raw-modeling/1mcg.pdb') + aln = ost.seq.CreateAlignment(ost.seq.CreateSequence('trg', + 'DDFAGDTKNLGHN'), + ost.seq.CreateSequence('tpl', + 'NN----A----LF')) + aln.AttachView(1, tpl.CreateFullView()) + rmodel = rawmodel.BuildRawModel(aln) + assert len(rmodel.gaps) == 2 + rmodel.MergeGapsByDistance(0) + assert len(rmodel.gaps) == 1 + + .. doctest:: mergegapsbydist + + import ost + from promod3 import rawmodel + + tpl = ost.io.LoadPDB('1mcg.pdb') + aln = ost.io.LoadAlignment('1mcg_aln.fasta') + aln.AttachView(1, tpl.CreateFullView()) + rmodel = rawmodel.BuildRawModel(aln) + rmodel.MergeGapsByDistance(0) :param distance: The max. no. of residues between two gaps up to which merge happens. @@ -170,10 +195,40 @@ def _MergeGapsByDistance(self, distance): :return: Nothing. ''' - ost.LogVerbose(self) - ost.LogVerbose(distance) - - -__all__ = ['_CloseSmallDeletions'] + # IMPORTANT: the assumption is that RawModelingResult stores gaps + # sequentially + + # indicate if we merged gaps and should check for more + try_again = True + + # The number of gaps changes on merge, so we cannot just iterate them. + # If we merged gaps, we do not know if this was the last one so try_again + # is set to True. If no more gaps were merged, we stop by leaving try_again + # as False. + while try_again: + try_again = False + # iterate all but the last gap, since we are always looking ahead + for i in range(len(self.gaps) - 1): + current_gap = self.gaps[i] + next_gap = self.gaps[i+1] + # check that we are on the same chain + if current_gap.GetChain() != next_gap.GetChain(): + continue + # no merging of gaps at the end AND the start :) + if current_gap.IsNTerminal() and next_gap.IsCTerminal(): + continue + # get the distance between the gaps + dist = next_gap.before.GetNumber().GetNum() \ + - current_gap.after.GetNumber().GetNum() + if dist <= distance: + # gaps are close enough, combine! combine! + self.MergeGaps(i) + ost.LogVerbose("Merged gap %s and %s into %s" % \ + (current_gap, next_gap, self.gaps[i])) + try_again = True + break + + +__all__ = ['_CloseSmallDeletions', '_MergeGapsByDistance'] # LocalWords: modeling stereochemically param diff --git a/rawmodel/tests/CMakeLists.txt b/rawmodel/tests/CMakeLists.txt index be76c1be0b6a23dd40df9bb6eb284e6a319e5a51..ed172439b2120b3a390a270a8a94e7e08d66d671 100644 --- a/rawmodel/tests/CMakeLists.txt +++ b/rawmodel/tests/CMakeLists.txt @@ -1,7 +1,7 @@ set(RAWMODEL_UNIT_TESTS test_raw_modeling.py test_gap_extension.py - test_closesgaps.py + test_close_gaps.py ) set(RAWMODEL_TEST_DATA @@ -16,6 +16,7 @@ set(RAWMODEL_TEST_DATA data/raw-modeling/del.fasta data/raw-modeling/cbeta.pdb data/raw-modeling/cbeta.fasta + data/raw-modeling/1mcg.pdb ) promod3_unittest(MODULE rawmodel diff --git a/rawmodel/tests/data/raw-modeling/1mcg.pdb b/rawmodel/tests/data/raw-modeling/1mcg.pdb new file mode 100644 index 0000000000000000000000000000000000000000..782dcd93e69fa39f200ea083cfc0a47b4b754379 --- /dev/null +++ b/rawmodel/tests/data/raw-modeling/1mcg.pdb @@ -0,0 +1,78 @@ +HELIX 1 1 ASN A 41 ARG A 52 1 12 +HELIX 2 2 LEU A 223 TYR A 228 1 6 +HELIX 3 3 ARG A 295 VAL A 302 1 8 +HELIX 4 4 ASP A 332 TYR A 342 1 11 +HELIX 5 5 GLN A 369 PHE A 375 1 7 +SHEET 1 1 1 GLY A 57 ILE A 61 0 +SHEET 2 2 1 GLY A 64 PHE A 68 0 +SHEET 3 3 1 VAL A 77 VAL A 81 0 +SHEET 4 4 1 VAL A 85 PRO A 89 0 +SHEET 5 5 1 PHE A 101 VAL A 103 0 +SHEET 6 6 1 PHE A 112 GLY A 116 0 +SHEET 7 7 1 PHE A 121 ASP A 124 0 +SHEET 8 8 1 PHE A 136 LEU A 138 0 +SHEET 9 9 1 TYR A 144 SER A 147 0 +SHEET 10 10 1 PHE A 149 ASP A 152 0 +SHEET 11 11 1 ILE A 161 VAL A 163 0 +SHEET 12 12 1 THR A 166 ARG A 168 0 +SHEET 13 13 1 ARG A 171 HIS A 173 0 +SHEET 14 14 1 GLY A 178 GLU A 181 0 +SHEET 15 15 1 ILE A 183 ASN A 186 0 +SHEET 16 16 1 ILE A 195 THR A 197 0 +SHEET 17 17 1 GLY A 199 ASP A 201 0 +SHEET 18 18 1 ILE A 203 ARG A 206 0 +SHEET 19 19 1 LEU A 208 GLU A 211 0 +SHEET 20 20 1 LEU A 216 GLU A 219 0 +SHEET 21 21 1 GLY A 232 ARG A 234 0 +SHEET 22 22 1 ILE A 236 ASP A 239 0 +SHEET 23 23 1 ILE A 241 SER A 244 0 +SHEET 24 24 1 VAL A 250 GLY A 253 0 +SHEET 25 25 1 VAL A 262 THR A 265 0 +SHEET 26 26 1 VAL A 267 VAL A 270 0 +SHEET 27 27 1 VAL A 276 SER A 278 0 +SHEET 28 28 1 VAL A 352 PHE A 355 0 +SHEET 29 29 1 VAL A 357 ARG A 360 0 +SHEET 30 30 1 ALA A 365 LEU A 367 0 +SHEET 31 31 1 TYR A 400 GLY A 402 0 +SHEET 32 32 1 VAL A 408 ASP A 410 0 +ATOM 1170 N ASN A 186 4.019 30.959 20.793 1.00 11.38 N +ATOM 1171 CA ASN A 186 3.429 31.085 19.456 1.00 11.67 C +ATOM 1172 C ASN A 186 3.017 32.515 19.097 1.00 11.36 C +ATOM 1173 O ASN A 186 2.635 33.316 19.975 1.00 9.55 O +ATOM 1174 CB ASN A 186 2.310 30.087 19.262 1.00 11.85 C +ATOM 1175 CG ASN A 186 2.649 28.733 19.852 1.00 14.79 C +ATOM 1176 OD1 ASN A 186 1.924 28.219 20.686 1.00 17.92 O +ATOM 1177 ND2 ASN A 186 3.784 28.161 19.443 1.00 19.24 N +ATOM 1178 N ASN A 187 3.222 32.838 17.819 1.00 12.09 N +ATOM 1179 CA ASN A 187 2.676 34.038 17.169 1.00 13.35 C +ATOM 1180 C ASN A 187 3.048 35.377 17.837 1.00 13.30 C +ATOM 1181 O ASN A 187 2.182 36.129 18.305 1.00 13.67 O +ATOM 1182 CB ASN A 187 1.156 33.861 17.052 1.00 13.63 C +ATOM 1183 CG ASN A 187 0.496 34.969 16.298 1.00 15.82 C +ATOM 1184 OD1 ASN A 187 1.061 35.533 15.353 1.00 17.79 O +ATOM 1185 ND2 ASN A 187 -0.723 35.304 16.715 1.00 17.94 N +ATOM 1186 N ALA A 188 4.347 35.638 17.899 1.00 13.36 N +ATOM 1187 CA ALA A 188 4.919 36.848 18.497 1.00 13.12 C +ATOM 1188 C ALA A 188 4.983 37.951 17.450 1.00 13.42 C +ATOM 1189 O ALA A 188 4.986 37.656 16.241 1.00 13.21 O +ATOM 1190 CB ALA A 188 6.361 36.560 19.037 1.00 12.49 C +ATOM 1191 N LEU A 189 5.089 39.207 17.904 1.00 13.18 N +ATOM 1192 CA LEU A 189 5.305 40.332 16.986 1.00 13.51 C +ATOM 1193 C LEU A 189 6.774 40.408 16.615 1.00 14.04 C +ATOM 1194 O LEU A 189 7.617 40.263 17.469 1.00 13.93 O +ATOM 1195 CB LEU A 189 4.842 41.649 17.627 1.00 13.73 C +ATOM 1196 CG LEU A 189 4.810 43.000 16.889 1.00 12.27 C +ATOM 1197 CD1 LEU A 189 4.011 42.955 15.599 1.00 10.44 C +ATOM 1198 CD2 LEU A 189 4.259 44.029 17.846 1.00 9.81 C +ATOM 1199 N PHE A 190 7.081 40.622 15.340 1.00 15.37 N +ATOM 1200 CA PHE A 190 8.475 40.714 14.893 1.00 16.40 C +ATOM 1201 C PHE A 190 9.443 41.387 15.872 1.00 17.11 C +ATOM 1202 O PHE A 190 10.514 40.836 16.173 1.00 18.24 O +ATOM 1203 CB PHE A 190 8.587 41.340 13.499 1.00 16.62 C +ATOM 1204 CG PHE A 190 8.259 42.798 13.451 1.00 16.71 C +ATOM 1205 CD1 PHE A 190 6.946 43.216 13.309 1.00 14.70 C +ATOM 1206 CD2 PHE A 190 9.271 43.754 13.517 1.00 15.08 C +ATOM 1207 CE1 PHE A 190 6.638 44.580 13.244 1.00 16.59 C +ATOM 1208 CE2 PHE A 190 8.975 45.127 13.463 1.00 15.55 C +ATOM 1209 CZ PHE A 190 7.664 45.544 13.337 1.00 16.66 C +END diff --git a/rawmodel/tests/test_close_gaps.py b/rawmodel/tests/test_close_gaps.py new file mode 100644 index 0000000000000000000000000000000000000000..7039556075bd6caeddcfb02e97e8cf672e87e0e8 --- /dev/null +++ b/rawmodel/tests/test_close_gaps.py @@ -0,0 +1,94 @@ +import unittest +import ost +from ost import io, seq +from promod3 import rawmodel +from promod3 import loop + +# setting up an OST LogSink to capture messages +class _FetchLog(ost.LogSink): + def __init__(self): + ost.LogSink.__init__(self) + self.messages = dict() + + def LogMessage(self, message, severity): + levels = ['ERROR', 'WARNING', 'DEBUG', 'INFO', 'VERBOSE', 'TRACE'] + level = levels[severity] + if not level in self.messages.keys(): + self.messages[level] = list() + self.messages[level].append(message.strip()) + +class CloseGapsTests(unittest.TestCase): + def setUp(self): + self.log = _FetchLog() + ost.PushLogSink(self.log) + ost.PushVerbosityLevel(4) + + def testClosesmalldel(self): + # check that very small gaps are closed + # create a rawmodel to work with (which actually is a bit dangerous: + # the PDB file has nothing to do with the alignment...) + tpl = io.LoadPDB('data/raw-modeling/gly.pdb') + # switch target and template in this alignment to get a deletion + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'GGG-GGG'), + seq.CreateSequence('tpl', 'GGGAGGG')) + aln.AttachView(1, tpl.CreateFullView()) + rmodel = rawmodel.BuildRawModel(aln) + self.assertEqual(len(rmodel.gaps), 1) + # obtain the scorer + scorer = loop.SetupBackboneScorer(rmodel) + rmodel.CloseSmallDeletions(scorer) + self.assertEqual(len(rmodel.gaps), 0) + self.assertEqual(self.log.messages['VERBOSE'], + ['Assigning MOL_IDs', + 'selected 1 chain(s), 20 residue(s), 80 atom(s) 79 '+ + 'bond(s)', + 'Closed: A.GLY3-()-A.GLY4 by relaxing '+ + 'A.GLY3-(GG)-A.GLY6']) + + def testMergeGapsByDistance(self): + # test that merging two close gaps works + tpl = io.LoadPDB('data/raw-modeling/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGDTKNLGHN'), + seq.CreateSequence('tpl', 'NN----A----LF')) + aln.AttachView(1, tpl.CreateFullView()) + rmodel = rawmodel.BuildRawModel(aln) + self.assertEqual(len(rmodel.gaps), 2) + self.assertEqual(str(rmodel.gaps[0]), 'A.ASP2-(FAGD)-A.THR7') + self.assertEqual(str(rmodel.gaps[1]), 'A.THR7-(KNLG)-A.HIS12') + rmodel.MergeGapsByDistance(0) + self.assertEqual(len(rmodel.gaps), 1) + self.assertEqual(str(rmodel.gaps[0]), 'A.ASP2-(FAGDTKNLG)-A.HIS12') + self.assertEqual(self.log.messages['VERBOSE'], + ['Assigning MOL_IDs', + 'selected 1 chain(s), 5 residue(s), 40 atom(s) 40 '+ + 'bond(s)', + 'Merged gap A.ASP2-(FAGD)-A.THR7 and '+ + 'A.THR7-(KNLG)-A.HIS12 into '+ + 'A.ASP2-(FAGDTKNLG)-A.HIS12']) + + def testMergeGapsByDistanceBothTerminals(self): + # test that we do not delete the whole thing for gaps at terminals + tpl = io.LoadPDB('data/raw-modeling/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGDTKNLGHN'), + seq.CreateSequence('tpl', '----NNALF----')) + aln.AttachView(1, tpl.CreateFullView()) + rmodel = rawmodel.BuildRawModel(aln) + self.assertEqual(len(rmodel.gaps), 2) + rmodel.MergeGapsByDistance(4) + self.assertEqual(len(rmodel.gaps), 2) + + def testMergeGapsByDistanceOneTerminal(self): + # test that we do not delete the whole thing for gaps at terminals + tpl = io.LoadPDB('data/raw-modeling/1mcg.pdb') + aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGDTKNLGHN'), + seq.CreateSequence('tpl', 'NN----ALF----')) + aln.AttachView(1, tpl.CreateFullView()) + rmodel = rawmodel.BuildRawModel(aln) + self.assertEqual(len(rmodel.gaps), 2) + rmodel.MergeGapsByDistance(2) + self.assertEqual(len(rmodel.gaps), 1) + + +if __name__ == "__main__": + from ost import testutils + testutils.RunTests() diff --git a/rawmodel/tests/test_closesgaps.py b/rawmodel/tests/test_closesgaps.py deleted file mode 100644 index 2bafd2b48077d753ebeb8dfcf53c61dba6950133..0000000000000000000000000000000000000000 --- a/rawmodel/tests/test_closesgaps.py +++ /dev/null @@ -1,47 +0,0 @@ -import unittest -import ost -from ost import io, seq -from promod3 import rawmodel -from promod3 import loop - -# setting up an OST LogSink to capture messages -class _FetchLog(ost.LogSink): - def __init__(self): - ost.LogSink.__init__(self) - self.messages = dict() - - def LogMessage(self, message, severity): - levels = ['ERROR', 'WARNING', 'DEBUG', 'INFO', 'VERBOSE', 'TRACE'] - level = levels[severity] - if not level in self.messages.keys(): - self.messages[level] = list() - self.messages[level].append(message.strip()) - -class CloseGapsTests(unittest.TestCase): - def testClosesmalldel(self): - log = _FetchLog() - ost.PushVerbosityLevel(4) - ost.PushLogSink(log) - # create a rawmodel to work with (which actually is a bit dangerous: - # the PDB file has nothing to do with the alignment...) - tpl = io.LoadPDB('data/raw-modeling/gly.pdb') - # switch target and template in this alignment to get a deletion - aln = seq.CreateAlignment(seq.CreateSequence('trg', 'GGG-GGG'), - seq.CreateSequence('tpl', 'GGGAGGG')) - aln.AttachView(1, tpl.CreateFullView()) - rmodel = rawmodel.BuildRawModel(aln) - self.assertEqual(len(rmodel.gaps), 1) - # obtain the scorer - scorer = loop.SetupBackboneScorer(rmodel) - rmodel.CloseSmallDeletions(scorer) - self.assertEqual(len(rmodel.gaps), 0) - self.assertEqual(log.messages['VERBOSE'], - ['Assigning MOL_IDs', - 'selected 1 chain(s), 20 residue(s), 80 atom(s) 79 '+ - 'bond(s)', - 'Closed: A.GLY3-()-A.GLY4 by relaxing '+ - 'A.GLY3-(GG)-A.GLY6']) - -if __name__ == "__main__": - from ost import testutils - testutils.RunTests()