diff --git a/modelling/src/model.cc b/modelling/src/model.cc index 57c0fe4523be5413b27d0aac638de19b7f2b2f0a..2eafa9df60de5b57606242400f8ab632bc160d28 100644 --- a/modelling/src/model.cc +++ b/modelling/src/model.cc @@ -1024,19 +1024,29 @@ void BuildRawChain(const seq::AlignmentHandle& aln, continue; } const char src_olc = src_res.GetOneLetterCode(); + // sanity check that src_res matches aligned sequence if (src_olc != col[1]) { std::stringstream ss; ss << "Alignment-structure mismatch at pos " << i << " in chain " << chain_name << ", alignment is '" << col[1] << "' structure residue is '" << src_olc << "'"; - throw promod3::Error(ss.str()); + // We often have non-standard AA residues changing OLC mapping over time. + // This can make it likely for this check to fail and hence we only + // check the 20 standard amino acids. + if (conop::ResidueToAminoAcid(src_res.GetHandle()) == conop::XXX) { + ss << ". Ok for modified residue " << src_res.GetName() << "."; + LOG_WARNING(ss.str()) + } else { + ss << ". Not ok for standard amino acid " << src_res.GetName() << "!"; + throw promod3::Error(ss.str()); + } } // remove atoms with conflicting positions (i.e. on top of each other) CleanupAtomConflicts(src_res); // check for complete backbone, or in case of Calpha only model building, // if the src_res has a Calpha atom - if(!CheckBackboneAtoms(src_res)){ + if (!CheckBackboneAtoms(src_res)) { LOG_INFO(src_res << " has incomplete backbone. skipping"); gap_seq += trg_olc; continue; diff --git a/modelling/tests/test_modelling.py b/modelling/tests/test_modelling.py index 31cb60c4bad109b81eaf7a2795cad1cf9ce85d9b..0cd6b31046f3bf433cfd5cdcdb76ee4790f95c75 100644 --- a/modelling/tests/test_modelling.py +++ b/modelling/tests/test_modelling.py @@ -149,6 +149,28 @@ class ModellingTests(unittest.TestCase): self.assertTrue(residues[0].FindAtom("CB")) self.assertTrue(residues[0].FindAtom("OG")) + def testModifiedMismatch(self): + # test if we allow OLC mismatch for modified AA + tpl = io.LoadPDB('data/sep.pdb') + aln = seq.CreateAlignment( + seq.CreateSequence('trg', 'S'), + seq.CreateSequence('tpl', 'X')) + aln.AttachView(1, tpl.CreateFullView()) + result = modelling.BuildRawModel(aln) + residues = result.model.residues + # same as before as OLC of SEP is 'S' (i.e. matches) + self.assertEqual(len(residues), 1) + self.assertEqual(len(residues[0].atoms), 6) + self.assertTrue(residues[0].FindAtom("N")) + self.assertTrue(residues[0].FindAtom("CA")) + self.assertTrue(residues[0].FindAtom("C")) + self.assertTrue(residues[0].FindAtom("O")) + self.assertTrue(residues[0].FindAtom("CB")) + self.assertTrue(residues[0].FindAtom("OG")) + # NOTE: relevant seq-vs-str mismatch tested in testOffset + # See OST's nonstandard.cc for additional tests of handling modified + # residues. Code duplication will be removed in SCHWED-3569. + def testInsertCBeta(self): # test if the dst residues contain cbeta, unless they are glycines tpl = io.LoadPDB('data/cbeta.pdb')