From dd465a80f3d6c8cbebaebc88aaf91ee63f77eac5 Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Thu, 17 Sep 2015 14:57:08 +0200
Subject: [PATCH] SCHWED-372: Implemented, tested, documented
---
rawmodel/pymod/_closegaps.py | 69 +++++++++++++++--
rawmodel/tests/CMakeLists.txt | 3 +-
rawmodel/tests/data/raw-modeling/1mcg.pdb | 78 +++++++++++++++++++
rawmodel/tests/test_close_gaps.py | 94 +++++++++++++++++++++++
rawmodel/tests/test_closesgaps.py | 47 ------------
5 files changed, 236 insertions(+), 55 deletions(-)
create mode 100644 rawmodel/tests/data/raw-modeling/1mcg.pdb
create mode 100644 rawmodel/tests/test_close_gaps.py
delete mode 100644 rawmodel/tests/test_closesgaps.py
diff --git a/rawmodel/pymod/_closegaps.py b/rawmodel/pymod/_closegaps.py
index db634b37..4b024ec0 100644
--- a/rawmodel/pymod/_closegaps.py
+++ b/rawmodel/pymod/_closegaps.py
@@ -160,9 +160,34 @@ def _MergeGapsByDistance(self, distance):
Check if two neighbouring gaps are at max. *distance* residues apart from
each other. Then delete the residues and store a new gap spanning the whole
stretch of original gaps and the deleted region. Original gaps will be
- removed.
+ removed. Stem residues count to the gap, so **A-A-A** has a distance of 0.
- EXAMPLE!
+ .. testcode:: mergegapsbydist
+ :hide:
+
+ from promod3 import rawmodel
+
+ tpl = ost.io.LoadPDB('../tests/rawmodel/data/raw-modeling/1mcg.pdb')
+ aln = ost.seq.CreateAlignment(ost.seq.CreateSequence('trg',
+ 'DDFAGDTKNLGHN'),
+ ost.seq.CreateSequence('tpl',
+ 'NN----A----LF'))
+ aln.AttachView(1, tpl.CreateFullView())
+ rmodel = rawmodel.BuildRawModel(aln)
+ assert len(rmodel.gaps) == 2
+ rmodel.MergeGapsByDistance(0)
+ assert len(rmodel.gaps) == 1
+
+ .. doctest:: mergegapsbydist
+
+ import ost
+ from promod3 import rawmodel
+
+ tpl = ost.io.LoadPDB('1mcg.pdb')
+ aln = ost.io.LoadAlignment('1mcg_aln.fasta')
+ aln.AttachView(1, tpl.CreateFullView())
+ rmodel = rawmodel.BuildRawModel(aln)
+ rmodel.MergeGapsByDistance(0)
:param distance: The max. no. of residues between two gaps up to which
merge happens.
@@ -170,10 +195,40 @@ def _MergeGapsByDistance(self, distance):
:return: Nothing.
'''
- ost.LogVerbose(self)
- ost.LogVerbose(distance)
-
-
-__all__ = ['_CloseSmallDeletions']
+ # IMPORTANT: the assumption is that RawModelingResult stores gaps
+ # sequentially
+
+ # indicate if we merged gaps and should check for more
+ try_again = True
+
+ # The number of gaps changes on merge, so we cannot just iterate them.
+ # If we merged gaps, we do not know if this was the last one so try_again
+ # is set to True. If no more gaps were merged, we stop by leaving try_again
+ # as False.
+ while try_again:
+ try_again = False
+ # iterate all but the last gap, since we are always looking ahead
+ for i in range(len(self.gaps) - 1):
+ current_gap = self.gaps[i]
+ next_gap = self.gaps[i+1]
+ # check that we are on the same chain
+ if current_gap.GetChain() != next_gap.GetChain():
+ continue
+ # no merging of gaps at the end AND the start :)
+ if current_gap.IsNTerminal() and next_gap.IsCTerminal():
+ continue
+ # get the distance between the gaps
+ dist = next_gap.before.GetNumber().GetNum() \
+ - current_gap.after.GetNumber().GetNum()
+ if dist <= distance:
+ # gaps are close enough, combine! combine!
+ self.MergeGaps(i)
+ ost.LogVerbose("Merged gap %s and %s into %s" % \
+ (current_gap, next_gap, self.gaps[i]))
+ try_again = True
+ break
+
+
+__all__ = ['_CloseSmallDeletions', '_MergeGapsByDistance']
# LocalWords: modeling stereochemically param
diff --git a/rawmodel/tests/CMakeLists.txt b/rawmodel/tests/CMakeLists.txt
index be76c1be..ed172439 100644
--- a/rawmodel/tests/CMakeLists.txt
+++ b/rawmodel/tests/CMakeLists.txt
@@ -1,7 +1,7 @@
set(RAWMODEL_UNIT_TESTS
test_raw_modeling.py
test_gap_extension.py
- test_closesgaps.py
+ test_close_gaps.py
)
set(RAWMODEL_TEST_DATA
@@ -16,6 +16,7 @@ set(RAWMODEL_TEST_DATA
data/raw-modeling/del.fasta
data/raw-modeling/cbeta.pdb
data/raw-modeling/cbeta.fasta
+ data/raw-modeling/1mcg.pdb
)
promod3_unittest(MODULE rawmodel
diff --git a/rawmodel/tests/data/raw-modeling/1mcg.pdb b/rawmodel/tests/data/raw-modeling/1mcg.pdb
new file mode 100644
index 00000000..782dcd93
--- /dev/null
+++ b/rawmodel/tests/data/raw-modeling/1mcg.pdb
@@ -0,0 +1,78 @@
+HELIX 1 1 ASN A 41 ARG A 52 1 12
+HELIX 2 2 LEU A 223 TYR A 228 1 6
+HELIX 3 3 ARG A 295 VAL A 302 1 8
+HELIX 4 4 ASP A 332 TYR A 342 1 11
+HELIX 5 5 GLN A 369 PHE A 375 1 7
+SHEET 1 1 1 GLY A 57 ILE A 61 0
+SHEET 2 2 1 GLY A 64 PHE A 68 0
+SHEET 3 3 1 VAL A 77 VAL A 81 0
+SHEET 4 4 1 VAL A 85 PRO A 89 0
+SHEET 5 5 1 PHE A 101 VAL A 103 0
+SHEET 6 6 1 PHE A 112 GLY A 116 0
+SHEET 7 7 1 PHE A 121 ASP A 124 0
+SHEET 8 8 1 PHE A 136 LEU A 138 0
+SHEET 9 9 1 TYR A 144 SER A 147 0
+SHEET 10 10 1 PHE A 149 ASP A 152 0
+SHEET 11 11 1 ILE A 161 VAL A 163 0
+SHEET 12 12 1 THR A 166 ARG A 168 0
+SHEET 13 13 1 ARG A 171 HIS A 173 0
+SHEET 14 14 1 GLY A 178 GLU A 181 0
+SHEET 15 15 1 ILE A 183 ASN A 186 0
+SHEET 16 16 1 ILE A 195 THR A 197 0
+SHEET 17 17 1 GLY A 199 ASP A 201 0
+SHEET 18 18 1 ILE A 203 ARG A 206 0
+SHEET 19 19 1 LEU A 208 GLU A 211 0
+SHEET 20 20 1 LEU A 216 GLU A 219 0
+SHEET 21 21 1 GLY A 232 ARG A 234 0
+SHEET 22 22 1 ILE A 236 ASP A 239 0
+SHEET 23 23 1 ILE A 241 SER A 244 0
+SHEET 24 24 1 VAL A 250 GLY A 253 0
+SHEET 25 25 1 VAL A 262 THR A 265 0
+SHEET 26 26 1 VAL A 267 VAL A 270 0
+SHEET 27 27 1 VAL A 276 SER A 278 0
+SHEET 28 28 1 VAL A 352 PHE A 355 0
+SHEET 29 29 1 VAL A 357 ARG A 360 0
+SHEET 30 30 1 ALA A 365 LEU A 367 0
+SHEET 31 31 1 TYR A 400 GLY A 402 0
+SHEET 32 32 1 VAL A 408 ASP A 410 0
+ATOM 1170 N ASN A 186 4.019 30.959 20.793 1.00 11.38 N
+ATOM 1171 CA ASN A 186 3.429 31.085 19.456 1.00 11.67 C
+ATOM 1172 C ASN A 186 3.017 32.515 19.097 1.00 11.36 C
+ATOM 1173 O ASN A 186 2.635 33.316 19.975 1.00 9.55 O
+ATOM 1174 CB ASN A 186 2.310 30.087 19.262 1.00 11.85 C
+ATOM 1175 CG ASN A 186 2.649 28.733 19.852 1.00 14.79 C
+ATOM 1176 OD1 ASN A 186 1.924 28.219 20.686 1.00 17.92 O
+ATOM 1177 ND2 ASN A 186 3.784 28.161 19.443 1.00 19.24 N
+ATOM 1178 N ASN A 187 3.222 32.838 17.819 1.00 12.09 N
+ATOM 1179 CA ASN A 187 2.676 34.038 17.169 1.00 13.35 C
+ATOM 1180 C ASN A 187 3.048 35.377 17.837 1.00 13.30 C
+ATOM 1181 O ASN A 187 2.182 36.129 18.305 1.00 13.67 O
+ATOM 1182 CB ASN A 187 1.156 33.861 17.052 1.00 13.63 C
+ATOM 1183 CG ASN A 187 0.496 34.969 16.298 1.00 15.82 C
+ATOM 1184 OD1 ASN A 187 1.061 35.533 15.353 1.00 17.79 O
+ATOM 1185 ND2 ASN A 187 -0.723 35.304 16.715 1.00 17.94 N
+ATOM 1186 N ALA A 188 4.347 35.638 17.899 1.00 13.36 N
+ATOM 1187 CA ALA A 188 4.919 36.848 18.497 1.00 13.12 C
+ATOM 1188 C ALA A 188 4.983 37.951 17.450 1.00 13.42 C
+ATOM 1189 O ALA A 188 4.986 37.656 16.241 1.00 13.21 O
+ATOM 1190 CB ALA A 188 6.361 36.560 19.037 1.00 12.49 C
+ATOM 1191 N LEU A 189 5.089 39.207 17.904 1.00 13.18 N
+ATOM 1192 CA LEU A 189 5.305 40.332 16.986 1.00 13.51 C
+ATOM 1193 C LEU A 189 6.774 40.408 16.615 1.00 14.04 C
+ATOM 1194 O LEU A 189 7.617 40.263 17.469 1.00 13.93 O
+ATOM 1195 CB LEU A 189 4.842 41.649 17.627 1.00 13.73 C
+ATOM 1196 CG LEU A 189 4.810 43.000 16.889 1.00 12.27 C
+ATOM 1197 CD1 LEU A 189 4.011 42.955 15.599 1.00 10.44 C
+ATOM 1198 CD2 LEU A 189 4.259 44.029 17.846 1.00 9.81 C
+ATOM 1199 N PHE A 190 7.081 40.622 15.340 1.00 15.37 N
+ATOM 1200 CA PHE A 190 8.475 40.714 14.893 1.00 16.40 C
+ATOM 1201 C PHE A 190 9.443 41.387 15.872 1.00 17.11 C
+ATOM 1202 O PHE A 190 10.514 40.836 16.173 1.00 18.24 O
+ATOM 1203 CB PHE A 190 8.587 41.340 13.499 1.00 16.62 C
+ATOM 1204 CG PHE A 190 8.259 42.798 13.451 1.00 16.71 C
+ATOM 1205 CD1 PHE A 190 6.946 43.216 13.309 1.00 14.70 C
+ATOM 1206 CD2 PHE A 190 9.271 43.754 13.517 1.00 15.08 C
+ATOM 1207 CE1 PHE A 190 6.638 44.580 13.244 1.00 16.59 C
+ATOM 1208 CE2 PHE A 190 8.975 45.127 13.463 1.00 15.55 C
+ATOM 1209 CZ PHE A 190 7.664 45.544 13.337 1.00 16.66 C
+END
diff --git a/rawmodel/tests/test_close_gaps.py b/rawmodel/tests/test_close_gaps.py
new file mode 100644
index 00000000..70395560
--- /dev/null
+++ b/rawmodel/tests/test_close_gaps.py
@@ -0,0 +1,94 @@
+import unittest
+import ost
+from ost import io, seq
+from promod3 import rawmodel
+from promod3 import loop
+
+# setting up an OST LogSink to capture messages
+class _FetchLog(ost.LogSink):
+ def __init__(self):
+ ost.LogSink.__init__(self)
+ self.messages = dict()
+
+ def LogMessage(self, message, severity):
+ levels = ['ERROR', 'WARNING', 'DEBUG', 'INFO', 'VERBOSE', 'TRACE']
+ level = levels[severity]
+ if not level in self.messages.keys():
+ self.messages[level] = list()
+ self.messages[level].append(message.strip())
+
+class CloseGapsTests(unittest.TestCase):
+ def setUp(self):
+ self.log = _FetchLog()
+ ost.PushLogSink(self.log)
+ ost.PushVerbosityLevel(4)
+
+ def testClosesmalldel(self):
+ # check that very small gaps are closed
+ # create a rawmodel to work with (which actually is a bit dangerous:
+ # the PDB file has nothing to do with the alignment...)
+ tpl = io.LoadPDB('data/raw-modeling/gly.pdb')
+ # switch target and template in this alignment to get a deletion
+ aln = seq.CreateAlignment(seq.CreateSequence('trg', 'GGG-GGG'),
+ seq.CreateSequence('tpl', 'GGGAGGG'))
+ aln.AttachView(1, tpl.CreateFullView())
+ rmodel = rawmodel.BuildRawModel(aln)
+ self.assertEqual(len(rmodel.gaps), 1)
+ # obtain the scorer
+ scorer = loop.SetupBackboneScorer(rmodel)
+ rmodel.CloseSmallDeletions(scorer)
+ self.assertEqual(len(rmodel.gaps), 0)
+ self.assertEqual(self.log.messages['VERBOSE'],
+ ['Assigning MOL_IDs',
+ 'selected 1 chain(s), 20 residue(s), 80 atom(s) 79 '+
+ 'bond(s)',
+ 'Closed: A.GLY3-()-A.GLY4 by relaxing '+
+ 'A.GLY3-(GG)-A.GLY6'])
+
+ def testMergeGapsByDistance(self):
+ # test that merging two close gaps works
+ tpl = io.LoadPDB('data/raw-modeling/1mcg.pdb')
+ aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGDTKNLGHN'),
+ seq.CreateSequence('tpl', 'NN----A----LF'))
+ aln.AttachView(1, tpl.CreateFullView())
+ rmodel = rawmodel.BuildRawModel(aln)
+ self.assertEqual(len(rmodel.gaps), 2)
+ self.assertEqual(str(rmodel.gaps[0]), 'A.ASP2-(FAGD)-A.THR7')
+ self.assertEqual(str(rmodel.gaps[1]), 'A.THR7-(KNLG)-A.HIS12')
+ rmodel.MergeGapsByDistance(0)
+ self.assertEqual(len(rmodel.gaps), 1)
+ self.assertEqual(str(rmodel.gaps[0]), 'A.ASP2-(FAGDTKNLG)-A.HIS12')
+ self.assertEqual(self.log.messages['VERBOSE'],
+ ['Assigning MOL_IDs',
+ 'selected 1 chain(s), 5 residue(s), 40 atom(s) 40 '+
+ 'bond(s)',
+ 'Merged gap A.ASP2-(FAGD)-A.THR7 and '+
+ 'A.THR7-(KNLG)-A.HIS12 into '+
+ 'A.ASP2-(FAGDTKNLG)-A.HIS12'])
+
+ def testMergeGapsByDistanceBothTerminals(self):
+ # test that we do not delete the whole thing for gaps at terminals
+ tpl = io.LoadPDB('data/raw-modeling/1mcg.pdb')
+ aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGDTKNLGHN'),
+ seq.CreateSequence('tpl', '----NNALF----'))
+ aln.AttachView(1, tpl.CreateFullView())
+ rmodel = rawmodel.BuildRawModel(aln)
+ self.assertEqual(len(rmodel.gaps), 2)
+ rmodel.MergeGapsByDistance(4)
+ self.assertEqual(len(rmodel.gaps), 2)
+
+ def testMergeGapsByDistanceOneTerminal(self):
+ # test that we do not delete the whole thing for gaps at terminals
+ tpl = io.LoadPDB('data/raw-modeling/1mcg.pdb')
+ aln = seq.CreateAlignment(seq.CreateSequence('trg', 'DDFAGDTKNLGHN'),
+ seq.CreateSequence('tpl', 'NN----ALF----'))
+ aln.AttachView(1, tpl.CreateFullView())
+ rmodel = rawmodel.BuildRawModel(aln)
+ self.assertEqual(len(rmodel.gaps), 2)
+ rmodel.MergeGapsByDistance(2)
+ self.assertEqual(len(rmodel.gaps), 1)
+
+
+if __name__ == "__main__":
+ from ost import testutils
+ testutils.RunTests()
diff --git a/rawmodel/tests/test_closesgaps.py b/rawmodel/tests/test_closesgaps.py
deleted file mode 100644
index 2bafd2b4..00000000
--- a/rawmodel/tests/test_closesgaps.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import unittest
-import ost
-from ost import io, seq
-from promod3 import rawmodel
-from promod3 import loop
-
-# setting up an OST LogSink to capture messages
-class _FetchLog(ost.LogSink):
- def __init__(self):
- ost.LogSink.__init__(self)
- self.messages = dict()
-
- def LogMessage(self, message, severity):
- levels = ['ERROR', 'WARNING', 'DEBUG', 'INFO', 'VERBOSE', 'TRACE']
- level = levels[severity]
- if not level in self.messages.keys():
- self.messages[level] = list()
- self.messages[level].append(message.strip())
-
-class CloseGapsTests(unittest.TestCase):
- def testClosesmalldel(self):
- log = _FetchLog()
- ost.PushVerbosityLevel(4)
- ost.PushLogSink(log)
- # create a rawmodel to work with (which actually is a bit dangerous:
- # the PDB file has nothing to do with the alignment...)
- tpl = io.LoadPDB('data/raw-modeling/gly.pdb')
- # switch target and template in this alignment to get a deletion
- aln = seq.CreateAlignment(seq.CreateSequence('trg', 'GGG-GGG'),
- seq.CreateSequence('tpl', 'GGGAGGG'))
- aln.AttachView(1, tpl.CreateFullView())
- rmodel = rawmodel.BuildRawModel(aln)
- self.assertEqual(len(rmodel.gaps), 1)
- # obtain the scorer
- scorer = loop.SetupBackboneScorer(rmodel)
- rmodel.CloseSmallDeletions(scorer)
- self.assertEqual(len(rmodel.gaps), 0)
- self.assertEqual(log.messages['VERBOSE'],
- ['Assigning MOL_IDs',
- 'selected 1 chain(s), 20 residue(s), 80 atom(s) 79 '+
- 'bond(s)',
- 'Closed: A.GLY3-()-A.GLY4 by relaxing '+
- 'A.GLY3-(GG)-A.GLY6'])
-
-if __name__ == "__main__":
- from ost import testutils
- testutils.RunTests()
--
GitLab