diff --git a/core/pymod/pm3argparse.py b/core/pymod/pm3argparse.py index d4c9643f23532260f831e265e98178271792b129..e97776d16e0ee81d05ba2ea137c9604a63ac313d 100644 --- a/core/pymod/pm3argparse.py +++ b/core/pymod/pm3argparse.py @@ -32,6 +32,7 @@ import ost from ost import io, seq from promod3.core import helper +from promod3 import loop, modelling def _TmpForGZip(filename, suffix, msg_prefix): """Unpack a file to a tmp file if gzipped. @@ -259,6 +260,18 @@ def _FetchProfileFromFile(filename): str(exc), 52) return prof +def _FetchPsipredFromFile(filename): + """Load psipred prediction from filename and return it.""" + argstr = filename + ": " + helper.FileExists("Profile", 51, filename) + try: + pred = loop.PsipredPrediction.FromHHM(filename) + except Exception, exc: + helper.MsgErrorAndExit(argstr + ": failure to parse psipred " + + "prediction: " + str(exc), 56) + return pred + + def _GetChains(structures, structure_sources): """Get chain id to entity view (single chain) mapping (dict).""" # IDs: (file_base = base file name with no extensions) @@ -427,6 +440,8 @@ class PM3ArgumentParser(argparse.ArgumentParser): self._AssembleStructure() if 'PROFILE' in self.activate: self._AssembleProfile() + if 'FRAGMENTS' in self.activate: + self._AssembleFragments() def AddAlignment(self, allow_multitemplate=False): """Commandline options for alignments. @@ -603,6 +618,51 @@ class PM3ArgumentParser(argparse.ArgumentParser): """ self.activate.add('PROFILE') + + def AddFragments(self): + """Commandline option for usage of Fragments + + Activate everything needed to setup + :class:`promod3.modelling.FraggerHandle` objects in the argument parser. + Command line arguments are then added in :meth:`AssembleParser` and the + input is post processed and checked in :meth:`Parse`. + + Options/arguments added: + + * ``--use_fragments`` - Boolean flag whether to setup fragger handles. + + Notes: + + * Fragger handles are setup to identify fragments in a + :class:`promod3.loop.StructureDB`. + + * If no profiles are provided as additional argument + (``-s/--seqprof <FILE>``), fragments are identified based on BLOSUM62 + sequence similarity. + + * If you provide profiles that are not in hhm format, fragments are + identified based on BLOSUM62 sequence similarity, sequence profile + scoring and structural profile scoring. + + * If you provide profiles in hhm format (optimal case), psipred + predictions are fetched from there and fragments are identified based + on secondary structure agreement, secondary structure dependent + torsion probabilities, sequence profile scoring and structure + profile scoring. + + Attributes added to the namespace returned by :meth:`Parse`: + + * :attr:`fragger_handles` - :class:`list` of + :class:`promod3.modelling.FraggerHandle`, ordered to match the target + sequences. + + Exit codes related to fragments input: + + * 56 - cannot read psipred prediction from hhm file + """ + self.activate.add('FRAGMENTS') + + def _AssembleAlignment(self): """Actually add alignment arguments/options.""" aln_grp = self.add_mutually_exclusive_group(required=True) @@ -650,6 +710,15 @@ class PM3ArgumentParser(argparse.ArgumentParser): ".hhm, .hhm.gz, .pssm, .pssm.gz", action='append', default=list()) + def _AssembleFragments(self): + self.add_argument('--use_fragments', + help="Use fragments instead of torsion angle "+ + "based sampling for Monte Carlo approaches. "+ + "For optimal performance you should provide "+ + "sequence profiles in hhm format. (File "+ + "extensions: .hhm or .hhm.gz). BUT: be aware of"+ + "increased runtime.", action="store_true") + class PM3OptionsNamespace(object): # class will grow, so for the moment pylint is ignored #pylint: disable=too-few-public-methods @@ -672,6 +741,8 @@ class PM3OptionsNamespace(object): self._AttachViews() if 'PROFILE' in activated: self._PostProcessProfile() + if 'FRAGMENTS' in activated: + self._PostProcessFragments() def _PostProcessAlignment(self): #pylint: disable=no-member @@ -730,11 +801,11 @@ class PM3OptionsNamespace(object): # so not having any profile is fine return - loaded_profiles = list() + self.loaded_profiles = list() for src in self.seqprof: - loaded_profiles.append(_FetchProfileFromFile(src)) + self.loaded_profiles.append(_FetchProfileFromFile(src)) - prof_sequences = [p.sequence for p in loaded_profiles] + prof_sequences = [p.sequence for p in self.loaded_profiles] # check uniqueness of loaded profiles if len(set(prof_sequences)) != len(prof_sequences): @@ -746,7 +817,7 @@ class PM3OptionsNamespace(object): for aln in self.alignments] for s in trg_sequences: try: - self.profiles.append(loaded_profiles[prof_sequences.index(s)]) + self.profiles.append(self.loaded_profiles[prof_sequences.index(s)]) except Exception, exc: helper.MsgErrorAndExit("Could not find profile with sequence " + "that exactly matches trg seq: " + s, 55) @@ -758,6 +829,80 @@ class PM3OptionsNamespace(object): helper.MsgErrorAndExit("Could not map every profile to a target " + "sequence", 53) + def _PostProcessFragments(self): + + self.fragger_handles = list() + + if not self.use_fragments: + # no fragments requested, so lets just return + return + + trg_sequences = [aln.GetSequence(0).GetGaplessString() \ + for aln in self.alignments] + + # we only want to setup a Fragger for every unique target sequence + unique_trg_sequences = list(set(trg_sequences)) + + # already setup variables, fill later if required data is present + profiles = [None] * len(unique_trg_sequences) + psipred_predictions = [None] * len(unique_trg_sequences) + ts_coil = None + ts_helix = None + ts_extended = None + + # a structure db we need anyway. Load once and assign the same to all + # fraggers to avoid memory explosion + structure_db = loop.LoadStructureDB() + + # load the profiles + if hasattr(self, "profiles") and len(self.profiles) > 0: + profile_dict = dict() + for p in self.loaded_profiles: + profile_dict[p.sequence] = p + # as we already mapped the profiles in _PostProcessProfiles, + # the following is guaranteed to find the right profile + # for every unique target sequence + for s_idx, s in enumerate(unique_trg_sequences): + profiles[s_idx] = profile_dict[s] + + # For the psipred predictions we have to go back to the + # input files. If they all end with .hhm or hhm.gz we're ready to go + file_endings_ok = True + for src in self.seqprof: + if not (src.endswith(".hhm") or src.endswith(".hhm.gz")): + file_endings_ok = False + break + + if file_endings_ok: + # lets load the torsion samplers now as they are only required + # if we also add psipred handlers + ts_coil = loop.LoadTorsionSamplerCoil() + ts_extended = loop.LoadTorsionSamplerExtended() + ts_helix = loop.LoadTorsionSamplerHelical() + + # to get the right filenames we use the sequences of the + # loaded profiles that are in the same order as self.seqprof + profile_sequences = [p.sequence for p in self.loaded_profiles] + for s_idx, s in enumerate(unique_trg_sequences): + fn = self.seqprof[profile_sequences.index(s)] + psipred_predictions[s_idx] = _FetchPsipredFromFile(fn) + + # setup one fragger handle for each unique sequence + fraggers = list() + for i in range(len(unique_trg_sequences)): + fraggers.append(modelling.FraggerHandle(unique_trg_sequences[i], + profile = profiles[i], + psipred_pred = psipred_predictions[i], + rmsd_thresh = 0.02, + structure_db = structure_db, + torsion_sampler_coil = ts_coil, + torsion_sampler_helix = ts_helix, + torsion_sampler_extended = ts_extended)) + # map them to the chains + for s in trg_sequences: + self.fragger_handles.append(fraggers[unique_trg_sequences.index(s)]) + + # LocalWords: param attr prog argparse ArgumentParser bool sys os init str # LocalWords: progattr descattr argpinit argv formatter meth args namespace # LocalWords: ArgumentDefaultsHelpFormatter sysargv AssembleParser fasta io diff --git a/core/tests/test_pm3argparse.py b/core/tests/test_pm3argparse.py index a9626bf498adda61f58388636cfbdbd74fd08430..878008b5ffa97b15acc81dd7e18cdc62673b1e3a 100644 --- a/core/tests/test_pm3argparse.py +++ b/core/tests/test_pm3argparse.py @@ -1225,6 +1225,71 @@ class PM3ArgParseTests(unittest.TestCase): opts.profiles[1].sequence) + def testFraggerNotRequested(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddFragments() + parser.AssembleParser() + opts = parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-f', 'data/aln_tpl/5ua4_B.fasta']) + self.assertEqual(len(opts.alignments), 2) + self.assertEqual(len(opts.fragger_handles), 0) + + def testFraggerAttachedWithoutProfile(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddFragments() + parser.AssembleParser() + opts = parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-f', 'data/aln_tpl/5ua4_B.fasta', + '--use_fragments']) + self.assertEqual(len(opts.alignments), 2) + self.assertEqual(len(opts.fragger_handles), 2) + self.assertEqual(opts.alignments[0].GetSequence(0).GetGaplessString(), + opts.fragger_handles[0].sequence) + self.assertEqual(opts.alignments[1].GetSequence(0).GetGaplessString(), + opts.fragger_handles[1].sequence) + # most of the stuff in the fragger handles should be None + self.assertIsNone(opts.fragger_handles[0].profile) + self.assertIsNone(opts.fragger_handles[0].psipred_pred) + self.assertIsNone(opts.fragger_handles[0].torsion_sampler_coil) + self.assertIsNone(opts.fragger_handles[0].torsion_sampler_helix) + self.assertIsNone(opts.fragger_handles[0].torsion_sampler_extended) + self.assertIsNone(opts.fragger_handles[1].profile) + self.assertIsNone(opts.fragger_handles[1].psipred_pred) + self.assertIsNone(opts.fragger_handles[1].torsion_sampler_coil) + self.assertIsNone(opts.fragger_handles[1].torsion_sampler_helix) + self.assertIsNone(opts.fragger_handles[1].torsion_sampler_extended) + + def testFraggerAttachedWithProfile(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AddFragments() + parser.AssembleParser() + opts = parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-f', 'data/aln_tpl/5ua4_B.fasta', + '-s', 'data/aln_tpl/1crn.hhm', + '-s', 'data/aln_tpl/5ua4_B.hhm', + '--use_fragments']) + self.assertEqual(len(opts.alignments), 2) + self.assertEqual(len(opts.fragger_handles), 2) + self.assertEqual(opts.alignments[0].GetSequence(0).GetGaplessString(), + opts.fragger_handles[0].sequence) + self.assertEqual(opts.alignments[1].GetSequence(0).GetGaplessString(), + opts.fragger_handles[1].sequence) + # most of the stuff in the fragger handles should be None + self.assertIsNotNone(opts.fragger_handles[0].profile) + self.assertIsNotNone(opts.fragger_handles[0].psipred_pred) + self.assertIsNotNone(opts.fragger_handles[0].torsion_sampler_coil) + self.assertIsNotNone(opts.fragger_handles[0].torsion_sampler_helix) + self.assertIsNotNone(opts.fragger_handles[0].torsion_sampler_extended) + self.assertIsNotNone(opts.fragger_handles[1].profile) + self.assertIsNotNone(opts.fragger_handles[1].psipred_pred) + self.assertIsNotNone(opts.fragger_handles[1].torsion_sampler_coil) + self.assertIsNotNone(opts.fragger_handles[1].torsion_sampler_helix) + self.assertIsNotNone(opts.fragger_handles[1].torsion_sampler_extended) + # test options: --disable-aln check (for amino acids) # test options: --disable-input-checks (for all) # test option: --disable-mm-check (macromolecule)