test_pipeline.py

"""
Unit tests for modelling pipeline components.
We go from a fake cut in crambin to reconstruct that same protein.
"""
import unittest
from promod3 import loop, modelling
from ost import io, mol

################################################################
# Code to generate 1crn_cut and 1crn.fasta:
################################################################
# from ost import io,mol,seq
# # load protein
# prot = io.LoadPDB('1crn', remote=True)
# # get only amino acids
# prot = mol.CreateEntityFromView(prot.Select("peptide=true"), True)
# # get sequence
# seqres = ''.join([r.one_letter_code for r in prot.residues])
# # cut out stuff for fake pdb file
# prot_cut = mol.CreateEntityFromView(prot.Select("rnum < 25 or rnum > 30"), True)
# io.SavePDB(prot_cut, "data/1crn_cut.pdb")
# seqres_cut = seqres[:24] + '-'*6 + seqres[30:]
# # create alignment and dump
# aln = seq.CreateAlignment(
#     seq.CreateSequence('trg', seqres),
#     seq.CreateSequence('tpl', seqres_cut))
# io.SaveAlignment(aln, "data/1crn.fasta")
################################################################
# Code to generate reference solutions:
################################################################
# from ost import io
# from promod3 import loop,modelling
# # create raw model
# tpl = io.LoadPDB('data/1crn_cut.pdb')
# aln = io.LoadAlignment('data/1crn.fasta')
# aln.AttachView(1, tpl.CreateFullView())
# mhandle = modelling.BuildRawModel(aln)
# # do it all
# final_model = modelling.BuildFromRawModel(mhandle)
# io.SavePDB(final_model, 'data/1crn_build.pdb')
# # step-by-step - loop
# mhandle = modelling.BuildRawModel(aln)
# scorer = modelling.SetupBackboneScorer(mhandle)
# frag_db = loop.LoadFragDB()
# structure_db = loop.LoadStructureDB()
# torsion_sampler = loop.LoadTorsionSamplerCoil()
# modelling.FillLoopsByDatabase(mhandle, scorer, frag_db,
#                               structure_db,
#                               torsion_sampler)
# io.SavePDB(mhandle.model, 'data/1crn_rec.pdb')
# # step-by-step - sidechains
# mhandle.model = io.LoadPDB('data/1crn_rec.pdb')
# modelling.BuildSidechains(mhandle)
# io.SavePDB(mhandle.model, 'data/1crn_sc.pdb')
# # step-by-step - energy minimization
# mhandle.model = io.LoadPDB('data/1crn_sc.pdb')
# modelling.MinimizeModelEnergy(mhandle)
# io.SavePDB(mhandle.model, 'data/1crn_final.pdb')
################################################################

class PipelineTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        # load dbs etc here for all tests
        cls.frag_db = loop.LoadFragDB()
        cls.structure_db = loop.LoadStructureDB()
        cls.torsion_sampler = loop.LoadTorsionSamplerCoil()

    def compare(self, model, filename, delta=0.01):
        '''Compare mhandle model with whatever is stored in filename.'''
        model_ref = io.LoadPDB(filename)
        diff = mol.alg.Superpose(model_ref, model)
        self.assertLess(diff.rmsd, delta)

    def getRawModel(self):
        '''Get default raw model. Can overwrite model in there.'''
        tpl = io.LoadPDB('data/1crn_cut.pdb')
        aln = io.LoadAlignment('data/1crn.fasta')
        aln.AttachView(1, tpl.CreateFullView())
        return modelling.BuildRawModel(aln)

    def testLoopReconstruction(self):
        '''Check loop reconstruction.'''
        mhandle = self.getRawModel()
        scorer = modelling.SetupBackboneScorer(mhandle)
        modelling.FillLoopsByDatabase(mhandle, scorer, self.frag_db,
                                      self.structure_db,
                                      self.torsion_sampler)
        self.assertEqual(len(mhandle.gaps), 0)
        self.compare(mhandle.model, 'data/1crn_rec.pdb')

    def testBuildSidechains(self):
        '''Check building of sidechains.'''
        mhandle = self.getRawModel()
        mhandle.model = io.LoadPDB('data/1crn_rec.pdb')
        modelling.BuildSidechains(mhandle)
        self.compare(mhandle.model, 'data/1crn_sc.pdb')

    def testMinimizeModelEnergy(self):
        '''Check energy minimization.'''
        mhandle = self.getRawModel()
        mhandle.model = io.LoadPDB('data/1crn_sc.pdb')
        modelling.MinimizeModelEnergy(mhandle)
        self.compare(mhandle.model, 'data/1crn_final.pdb')

    def testBuildFromRawModel(self):
        '''Check that BuildFromRawModel produces valid model.'''
        # preparing a tiny raw model
        tpl = io.LoadPDB('data/2l4k_B.pdb.gz')
        aln = io.LoadAlignment('data/2l4k_B_aln.fasta')
        aln.AttachView(1, tpl.Select('peptide=true'))
        mhandle = modelling.BuildRawModel(aln)
        # run the pipeline
        final_model = modelling.BuildFromRawModel(mhandle, self.frag_db,
                                                  self.structure_db,
                                                  self.torsion_sampler)
        self.assertTrue(final_model.IsValid())

    def testAllShort(self):
        '''Ensure example pipeline in doc works.'''
        # get raw model
        mhandle = self.getRawModel()
        # build final model
        final_model = modelling.BuildFromRawModel(mhandle)
        self.assertTrue(final_model.IsValid())
        self.compare(final_model, 'data/1crn_build.pdb')

    def testAllSteps(self):
        '''Ensure single steps pipeline in doc works.'''
        # setup
        merge_distance = 4

        # get raw model
        mhandle = self.getRawModel()

        # perform loop modelling to close all gaps
        scorer = modelling.SetupBackboneScorer(mhandle)
        modelling.CloseSmallDeletions(mhandle, scorer)
        modelling.RemoveTerminalGaps(mhandle)
        for distance in range(merge_distance):
            modelling.MergeGapsByDistance(mhandle, distance)
            modelling.FillLoopsByDatabase(mhandle, scorer, self.frag_db,
                                          self.structure_db, self.torsion_sampler)
        # if above fails on some gaps, use Monte Carlo
        if len(mhandle.gaps) > 0:
            modelling.FillLoopsByMonteCarlo(mhandle, scorer, self.torsion_sampler)
        # build sidechains
        modelling.BuildSidechains(mhandle)
        # minimize energy of final model using molecular mechanics
        modelling.MinimizeModelEnergy(mhandle)

        # extract final model
        final_model = mhandle.model
        self.assertTrue(final_model.IsValid())
        self.compare(final_model, 'data/1crn_build.pdb')

if __name__ == "__main__":
    from ost import testutils
    testutils.RunTests()