diff --git a/.gitignore b/.gitignore index 1a16224df6a7b2a3adc436548674fa7665c4471b..f11a3d572226b6953225d0b4229363abfb251c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,4 @@ modules/gui/src/dngr.qrc.depends /compile_commands.json /modules/bindings/tests/formatdb.log /modules/mol/mm/src/settings.hh +*.img diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 07ec9add92440140d08db07aaf2e9fc1c12abec7..00bd9b3e2765afaf9c8de816bfbbc5882b8c2eca 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -11,6 +11,7 @@ Changes in Release <RELEASE NUMBER> * Added Molck API to the ost.mol.alg module. * Extended lDDT API in ost.mol.alg module to reproduce functionality of lddt binary. + * Added `actions` interface including one action to compare structures. Changes in Release 1.7.1 -------------------------------------------------------------------------------- @@ -183,4 +184,3 @@ Changes in Release 1.2.0 (since 1.1.0) to lDDT * new superposition dialog in DNG - diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cf4c283a4cbfd59833a4d76837da48948dbe9f1..4327ea12f5523885cc462554bce4c7efcb93eab0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -330,6 +330,7 @@ endif() add_subdirectory(modules) add_subdirectory(scripts) add_subdirectory(tools) +add_subdirectory(actions) # deployment has to come last, to ensure that all install commands are run before deployment # magic is done add_subdirectory(deployment) diff --git a/actions/CMakeLists.txt b/actions/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce6e23204e1a1b3b08332c428e78d29384c48808 --- /dev/null +++ b/actions/CMakeLists.txt @@ -0,0 +1,4 @@ +add_custom_target(actions ALL) + +ost_action_init() +ost_action(ost-compare-structures actions) diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures new file mode 100644 index 0000000000000000000000000000000000000000..0da8fe4a2c491f49301cd64888dad01412516d0c --- /dev/null +++ b/actions/ost-compare-structures @@ -0,0 +1,916 @@ +"""Evaluate model structure against reference. + +eg. + + ost compare-structures \\ + --model <MODEL> \\ + --reference <REF> \\ + --output output.json \\ + --lddt \\ + --structural-checks \\ + --consistency-checks \\ + --molck \\ + --remove oxt hyd \\ + --map-nonstandard-residues + +If desired one can recreate what CAMEO is calculating. CAMEO calls lddt binary +as follows: + + lddt \\ + -p <PARAMETER FILE> \\ + -f \\ + -a 15 \\ + -b 15 \\ + -r 15 \\ + <MODEL> \\ + <REF> + +Only model structures are "Molck-ed" in CAMEO. The call to molck is as follows: + + molck \\ + --complib=<COMPOUND LIB> \\ + --rm=hyd,oxt,unk \\ + --fix-ele \\ + --map-nonstd <FILEPATH> \\ + --out=<OUTPUT> + +To be as much compatible with with CAMEO as possible one should call +compare-structures as follows: + + ost compare-structures \\ + # General parameters + #################### + --model <MODEL> \\ + --reference <REF> \\ + --output output.json \\ + # QS-score parameters + ##################### + --qs-score \\ + --residue-number-alignment \\ + # lDDT parameters + ################# + --lddt \\ + --inclusion-radius 15.0 \\ + # Molecular check parameters + ############################ + --molck \\ + --remove oxt hyd unk \\ + --clean-element-column \\ + --map-nonstandard-residues \\ + # Additional checks + ################### + --structural-checks \\ + --bond-tolerance 15.0 \\ + --angle-tolerance 15.0 \\ + --consistency-checks +""" + +import os +import sys +import json +import argparse + +import ost +from ost.io import (LoadPDB, LoadMMCIF, SavePDB, MMCifInfoBioUnit, MMCifInfo, + MMCifInfoTransOp, ReadStereoChemicalPropsFile, profiles) +from ost import PushVerbosityLevel +from ost.mol.alg import (qsscoring, Molck, MolckSettings, lDDTSettings, + CheckStructure, ResidueNamesMatch) +from ost.conop import (CompoundLib, SetDefaultLib, GetDefaultLib, + RuleBasedProcessor) +from ost.seq.alg.renumber import Renumber + + +def _GetDefaultShareFilePath(filename): + """Look for filename in working directory and OST shared data path. + :return: Path to valid file or None if not found. + """ + # Try current directory + cwd = os.path.abspath(os.getcwd()) + file_path = os.path.join(cwd, filename) + if not os.path.isfile(file_path): + try: + file_path = os.path.join(ost.GetSharedDataPath(), filename) + except RuntimeError: + # Ignore errors here (caught later together with non-existing file) + pass + if not os.path.isfile(file_path): + file_path = None + # Either file_path is valid file path or None + return file_path + +def _GetDefaultParameterFilePath(): + # Try to get in default locations + parameter_file_path = _GetDefaultShareFilePath("stereo_chemical_props.txt") + if parameter_file_path is None: + msg = ( + "Could not set default stereochemical parameter file. In " + "order to use the default one please set $OST_ROOT " + "environmental variable, run the script with OST binary or" + " provide a local copy of 'stereo_chemical_props.txt' in " + "CWD. Alternatively provide the path to the local copy.") + else: + msg = "" + return parameter_file_path, msg + +def _GetDefaultCompoundLibraryPath(): + # Try to get in default locations + compound_library_path = _GetDefaultShareFilePath("compounds.chemlib") + if compound_library_path is None: + msg = ( + "Could not set default compounds library path. In " + "order to use the default one please set $OST_ROOT " + "environmental variable, run the script with OST binary or" + " provide a local copy of 'compounds.chemlib' in CWD" + ". Alternatively provide the path to the local copy.") + else: + msg = "" + return compound_library_path, msg + +def _ParseArgs(): + """Parse command-line arguments.""" + # + # General options + # + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description=__doc__, + prog="ost compare-structures") + + parser.add_argument( + '-v', + '--verbosity', + type=int, + default=3, + help="Set verbosity level.") + parser.add_argument( + "-m", + "--model", + dest="model", + required=True, + help=("Path to the model file.")) + parser.add_argument( + "-r", + "--reference", + dest="reference", + required=True, + help=("Path to the reference file.")) + parser.add_argument( + "-o", + "--output", + dest="output", + help=("Output file name. The output will be saved as a JSON file.")) + parser.add_argument( + "-d", + "--dump-structures", + dest="dump_structures", + default=False, + action="store_true", + help=("Dump cleaned structures used to calculate all the scores as\n" + "PDB files using specified suffix. Files will be dumped to the\n" + "same location as original files.")) + parser.add_argument( + "-ds", + "--dump-suffix", + dest="dump_suffix", + default=".compare.structures.pdb", + help=("Use this suffix to dump structures.\n" + "Defaults to .compare.structures.pdb.")) + parser.add_argument( + "-rs", + "--reference-selection", + dest="reference_selection", + default="", + help=("Selection performed on reference structures.")) + parser.add_argument( + "-ms", + "--model-selection", + dest="model_selection", + default="", + help=("Selection performed on model structures.")) + parser.add_argument( + "-ca", + "--c-alpha-only", + dest="c_alpha_only", + default=False, + action="store_true", + help=("Use C-alpha atoms only. Equivalent of calling the action with\n" + "'--model-selection=\"aname=CA\" " + "--reference-selection=\"aname=CA\"'\noptions.")) + parser.add_argument( + "-ft", + "--fault-tolerant", + dest="fault_tolerant", + default=False, + action="store_true", + help=("Fault tolerant parsing.")) + parser.add_argument( + "-cl", + "--compound-library", + dest="compound_library", + default=None, + help=("Location of the compound library file (compounds.chemlib).\n" + "If not provided, the following locations are searched in this\n" + "order: 1. Working directory, 2. OpenStructure standard library" + "\nlocation.")) + # + # QS-scorer options + # + parser.add_argument( + "-qs", + "--qs-score", + dest="qs_score", + default=False, + action="store_true", + help=("Calculate QS-score.")) + parser.add_argument( + "-c", + "--chain-mapping", + nargs="+", + type=lambda x: x.split(":"), + dest="chain_mapping", + help=("Mapping of chains between the reference and the model.\n" + "Each separate mapping consist of key:value pairs where key\n" + "is the chain name in reference and value is the chain name in\n" + "model.")) + parser.add_argument( + "-rna", + "--residue-number-alignment", + dest="residue_number_alignment", + default=False, + action="store_true", + help=("Make alignment based on residue number instead of using\n" + "a global BLOSUM62-based alignment.")) + # + # lDDT options + # + parser.add_argument( + "-l", + "--lddt", + dest="lddt", + default=False, + action="store_true", + help=("Calculate lDDT.")) + parser.add_argument( + "-ir", + "--inclusion-radius", + dest="inclusion_radius", + type=float, + default=15.0, + help=("Distance inclusion radius.")) + parser.add_argument( + "-ss", + "--sequence-separation", + dest="sequence_separation", + type=int, + default=0, + help=("Sequence separation. Only distances between residues whose\n" + "separation is higher than the provided parameter are\n" + "considered when computing the score")) + parser.add_argument( + "-spr", + "--save-per-residue-scores", + dest="save_per_residue_scores", + default=False, + action="store_true", + help=("")) + # + # Molecular check parameters + # + parser.add_argument( + "-ml", + "--molck", + dest="molck", + default=False, + action="store_true", + help=("Run molecular checker to clean up input.")) + parser.add_argument( + "-rm", + "--remove", + dest="remove", + nargs="+", # *, +, ?, N + required=False, + default=["hyd"], + help=("Remove atoms and residues matching some criteria:\n" + " * zeroocc - Remove atoms with zero occupancy\n" + " * hyd - remove hydrogen atoms\n" + " * oxt - remove terminal oxygens\n" + " * nonstd - remove all residues not one of the 20\n" + " * standard amino acids\n" + " * unk - Remove unknown and atoms not following the" + "nomenclature")) + parser.add_argument( + "-ce", + "--clean-element-column", + dest="clean_element_column", + default=False, + action="store_true", + help=("Clean up element column")) + parser.add_argument( + "-mn", + "--map-nonstandard-residues", + dest="map_nonstandard_residues", + default=False, + action="store_true", + help=("Map modified residues back to the parent amino acid, for\n" + "example MSE -> MET, SEP -> SER.")) + # + # Options for various checks + # + parser.add_argument( + "-sc", + "--structural-checks", + dest="structural_checks", + default=False, + action="store_true", + help=("Perform structural checks and filter input data.")) + parser.add_argument( + "-p", + "--parameter-file", + dest="parameter_file", + default=None, + help=("Location of the stereochemical parameter file\n" + "(stereo_chemical_props.txt).\n" + "If not provided, the following locations are searched in this\n" + "order: 1. Working directory, 2. OpenStructure standard library" + "\nlocation.")) + parser.add_argument( + "-bt", + "--bond-tolerance", + dest="bond_tolerance", + type=float, + default=12.0, + help=("Tolerance in STD for bonds.")) + parser.add_argument( + "-at", + "--angle-tolerance", + dest="angle_tolerance", + type=float, + default=12.0, + help=("Tolerance in STD for angles.")) + parser.add_argument( + "-cc", + "--consistency-checks", + dest="consistency_checks", + default=False, + action="store_true", + help=("Take consistency checks into account. By default residue name\n" + "consistency between a model-reference pair would be checked\n" + "but only a warning message will be displayed and the script\n" + "will continue to calculate scores. If this flag is ON, checks\n" + "will not be ignored and if the pair does not pass the test\n" + "all the scores for that pair will be marked as a FAILURE.")) + + # Print full help is no arguments provided + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + opts = parser.parse_args() + # Set chain mapping + if opts.chain_mapping is not None: + try: + opts.chain_mapping = dict(opts.chain_mapping) + except ValueError: + parser.error( + "Cannot parse chain mapping into dictionary. The " + "correct format is: key:value [key2:value2 ...].") + + # Check parameter file if structural checks are on + if opts.structural_checks: + if opts.parameter_file is None: + # try to get default if none provided + opts.parameter_file, msg = _GetDefaultParameterFilePath() + if msg: + parser.error(msg) + else: + # if provided it must exist + if not os.path.isfile(opts.parameter_file): + parser.error("Parameter file %s does not exist." \ + % opts.parameter_file) + + # Check compound library path (always required!) + if opts.compound_library is None: + # try to get default if none provided + opts.compound_library, msg = _GetDefaultCompoundLibraryPath() + if msg: + parser.error(msg) + else: + # if provided it must exist + if not os.path.isfile(opts.compound_library): + parser.error("Compounds library file %s does not exist." \ + % opts.compound_library) + + # Check model and reference paths + if not os.path.isfile(opts.model): + parser.error("Model file %s does not exist." % opts.model) + if not os.path.isfile(opts.reference): + parser.error("Reference file %s does not exist." % opts.reference) + + return opts + + +def _SetCompoundsChemlib(path_to_chemlib): + """Set default compound library for OST.""" + # NOTE: This is adapted from ProMod3 code and should in the future be doable + # with some shared OST code! + compound_lib = CompoundLib.Load(path_to_chemlib) + SetDefaultLib(compound_lib) + processor = RuleBasedProcessor(compound_lib) + for profile_name in profiles: + profiles[profile_name].processor = processor.Copy() + + +def _RevertChainNames(ent): + """Revert chain names to original names. + + By default the first chain with given name will not have any number + attached to it ie. if there are two chains mapping to chain A the resulting + chain names will be: A and A2. + """ + editor = ent.EditXCS() + suffix = "_tmp" # just a suffix for temporary chain name + separator = "" # dot causes selection error + used_names = dict() + reverted_chains = dict() + for chain in ent.chains: + try: + original_name = chain.GetStringProp("original_name") + except Exception as ex: + ost.LogError("Cannot revert chain %s back to original: %s" % ( + chain.name, + str(ex))) + reverted_chains[chain.name] = chain.name + editor.RenameChain(chain, chain.name + suffix) + continue + new_name = original_name + if new_name not in used_names: + used_names[original_name] = 2 + reverted_chains[chain.name] = new_name + editor.RenameChain(chain, chain.name + suffix) + else: + new_name = "%s%s%i" % (original_name, + separator, + used_names[original_name]) + reverted_chains[chain.name] = new_name + editor.RenameChain(chain, chain.name + suffix) + used_names[original_name] += 1 + for chain in ent.chains: + editor.RenameChain(chain, reverted_chains[chain.name[:-len(suffix)]]) + rev_out = ["%s -> %s" % (on, nn) for on, nn in reverted_chains.iteritems()] + ost.LogInfo("Reverted chains: %s" % ", ".join(rev_out)) + + +def _CheckConsistency(alignments, log_error): + is_cons = True + for alignment in alignments: + ref_chain = Renumber(alignment.GetSequence(0)).CreateFullView() + mdl_chain = Renumber(alignment.GetSequence(1)).CreateFullView() + new_is_cons = ResidueNamesMatch(ref_chain, mdl_chain, log_error) + is_cons = is_cons and new_is_cons + return is_cons + + +def _GetAlignmentsAsFasta(alignments): + """Get the alignments as FASTA formated string. + + :param alignments: Alignments + :type alignments: list of AlignmentHandle + :returns: list of alignments in FASTA format + :rtype: list of strings + """ + strings = list() + for alignment in alignments: + aln_str = ">reference:%s\n%s\n>model:%s\n%s" % ( + alignment.GetSequence(0).name, + alignment.GetSequence(0).GetString(), + alignment.GetSequence(1).name, + alignment.GetSequence(1).GetString()) + strings.append(aln_str) + return strings + + +def _ReadStructureFile(path, c_alpha_only=False, fault_tolerant=False, + selection=""): + """Safely read structure file into OST entity. + + The functin can read both PDB and mmCIF files. + + :param path: Path to the file. + :type path: :class:`str` + :returns: Entity + :rtype: :class:`~ost.mol.EntityHandle` + """ + + def _Select(entity): + selection_message = "Selecting %s" % selection + if selection: + ost.LogInfo(selection_message) + entity = entity.Select(selection) + return entity + + entities = list() + if not os.path.isfile(path): + raise IOError("%s is not a file" % path) + try: + entity = LoadPDB( + path, + fault_tolerant=fault_tolerant, + calpha_only=c_alpha_only) + if not entity.IsValid(): + raise IOError("Provided file does not contain valid entity.") + entity.SetName(os.path.basename(path)) + entity = _Select(entity) + entities.append(entity) + except Exception: + try: + tmp_entity, cif_info = LoadMMCIF( + path, + info=True, + fault_tolerant=fault_tolerant, + calpha_only=c_alpha_only) + if len(cif_info.biounits) == 0: + tbu = MMCifInfoBioUnit() + tbu.id = 'ASU of ' + entity.pdb_id + tbu.details = 'asymmetric unit' + for chain in tmp_entity.chains: + tbu.AddChain(str(chain)) + tinfo = MMCifInfo() + tops = MMCifInfoTransOp() + tinfo.AddOperation(tops) + tbu.AddOperations(tinfo.GetOperations()) + entity = tbu.PDBize(tmp_entity, min_polymer_size=0) + entity.SetName(os.path.basename(path) + ".au") + _RevertChainNames(entity) + entity = _Select(entity) + entities.append(entity) + elif len(cif_info.biounits) > 1: + for i, biounit in enumerate(cif_info.biounits, 1): + entity = biounit.PDBize(tmp_entity, min_polymer_size=0) + if not entity.IsValid(): + raise IOError( + "Provided file does not contain valid entity.") + entity.SetName(os.path.basename(path) + "." + str(i)) + _RevertChainNames(entity) + entity = _Select(entity) + entities.append(entity) + else: + biounit = cif_info.biounits[0] + entity = biounit.PDBize(tmp_entity, min_polymer_size=0) + if not entity.IsValid(): + raise IOError( + "Provided file does not contain valid entity.") + entity.SetName(os.path.basename(path)) + _RevertChainNames(entity) + entity = _Select(entity) + entities.append(entity) + + except Exception as exc: + raise exc + return entities + + +def _MolckEntity(entity, options): + """Molck the entity.""" + lib = GetDefaultLib() + to_remove = tuple(options.remove) + + ms = MolckSettings(rm_unk_atoms="unk" in to_remove, + rm_non_std="nonstd" in to_remove, + rm_hyd_atoms="hyd" in to_remove, + rm_oxt_atoms="oxt" in to_remove, + rm_zero_occ_atoms="zeroocc" in to_remove, + colored=False, + map_nonstd_res=options.map_nonstandard_residues, + assign_elem=options.clean_element_column) + Molck(entity, lib, ms) + + +def _Main(): + """Do the magic.""" + # + # Setup + opts = _ParseArgs() + PushVerbosityLevel(opts.verbosity) + _SetCompoundsChemlib(opts.compound_library) + # + # Read the input files + ost.LogInfo("#" * 80) + ost.LogInfo("Reading input files (fault_tolerant=%s)" % + str(opts.fault_tolerant)) + ost.LogInfo(" --> reading model from %s" % opts.model) + models = _ReadStructureFile( + opts.model, + c_alpha_only=opts.c_alpha_only, + fault_tolerant=opts.fault_tolerant, + selection=opts.model_selection) + ost.LogInfo(" --> reading reference from %s" % opts.reference) + references = _ReadStructureFile( + opts.reference, + c_alpha_only=opts.c_alpha_only, + fault_tolerant=opts.fault_tolerant, + selection=opts.reference_selection) + # molcking + if opts.molck: + ost.LogInfo("#" * 80) + ost.LogInfo("Cleaning up input with Molck") + for reference in references: + _MolckEntity(reference, opts) + for model in models: + _MolckEntity(model, opts) + # restrict to peptides (needed for CheckStructure anyways) + for i in range(len(references)): + references[i] = references[i].Select("peptide=true") + for i in range(len(models)): + models[i] = models[i].Select("peptide=true") + # structure checking + if opts.structural_checks: + ost.LogInfo("#" * 80) + ost.LogInfo("Performing structural checks") + stereochemical_parameters = ReadStereoChemicalPropsFile( + opts.parameter_file) + ost.LogInfo(" --> for reference(s)") + for reference in references: + ost.LogInfo("Checking %s" % reference.GetName()) + CheckStructure(reference, + stereochemical_parameters.bond_table, + stereochemical_parameters.angle_table, + stereochemical_parameters.nonbonded_table, + opts.bond_tolerance, + opts.angle_tolerance) + ost.LogInfo(" --> for model(s)") + for model in models: + ost.LogInfo("Checking %s" % model.GetName()) + CheckStructure(model, + stereochemical_parameters.bond_table, + stereochemical_parameters.angle_table, + stereochemical_parameters.nonbonded_table, + opts.bond_tolerance, + opts.angle_tolerance) + if len(models) > 1 or len(references) > 1: + ost.LogInfo("#" * 80) + ost.LogInfo( + "Multiple complexes mode ON. All combinations will be tried.") + + result = { + "result": {}, + "options": vars(opts)} + result["options"]["cwd"] = os.path.abspath(os.getcwd()) + # + # Perform scoring + skipped = list() + for model in models: + model_name = model.GetName() + model_results = dict() + for reference in references: + reference_name = reference.GetName() + reference_results = { + "info": dict()} + ost.LogInfo("#" * 80) + ost.LogInfo("Comparing %s to %s" % ( + model_name, + reference_name)) + qs_scorer = qsscoring.QSscorer(reference, + model, + opts.residue_number_alignment) + if opts.chain_mapping is not None: + ost.LogInfo( + "Using custom chain mapping: %s" % str( + opts.chain_mapping)) + qs_scorer.chain_mapping = opts.chain_mapping + else: + qs_scorer.chain_mapping # just to initialize it + ost.LogInfo("-" * 80) + ost.LogInfo("Checking consistency between %s and %s" % ( + model_name, reference_name)) + is_cons = _CheckConsistency( + qs_scorer.alignments, + opts.consistency_checks) + reference_results["info"]["residue_names_consistent"] = is_cons + reference_results["info"]["mapping"] = { + "chain_mapping": qs_scorer.chain_mapping, + "alignments": _GetAlignmentsAsFasta(qs_scorer.alignments)} + skip_score = False + if opts.consistency_checks: + if not is_cons: + msg = (("Residue names in model %s and in reference " + "%s are inconsistent.") % ( + model_name, + reference_name)) + ost.LogError(msg) + skip_score = True + skipped.append(skip_score) + else: + ost.LogInfo("Consistency check: OK") + skipped.append(False) + else: + skipped.append(False) + if not is_cons: + msg = (("Residue names in model %s and in reference " + "%s are inconsistent.\nThis might lead to " + "corrupted results.") % ( + model_name, + reference_name)) + ost.LogWarning(msg) + else: + ost.LogInfo("Consistency check: OK") + if opts.qs_score: + ost.LogInfo("-" * 80) + if skip_score: + ost.LogInfo( + "Skipping QS-score because consistency check failed") + reference_results["qs_score"] = { + "status": "FAILURE", + "error": "Consistency check failed.", + "model_name": model_name, + "reference_name": reference_name, + "global_score": 0.0, + "best_score": 0.0} + else: + ost.LogInfo("Computing QS-score") + try: + reference_results["qs_score"] = { + "status": "SUCCESS", + "error": "", + "global_score": qs_scorer.global_score, + "best_score": qs_scorer.best_score} + except qsscoring.QSscoreError as ex: + # default handling: report failure and set score to 0 + ost.LogError('QSscore failed:', str(ex)) + reference_results["qs_score"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0, + "best_score": 0.0} + # Calculate lDDT + if opts.lddt: + ost.LogInfo("-" * 80) + ost.LogInfo("Computing lDDT scores") + lddt_results = { + "single_chain_lddt": list() + } + lddt_settings = lDDTSettings( + radius=opts.inclusion_radius, + sequence_separation=opts.sequence_separation, + label="lddt") + ost.LogInfo("lDDT settings: ") + ost.LogInfo(str(lddt_settings).rstrip()) + ost.LogInfo("===") + oligo_lddt_scorer = qs_scorer.GetOligoLDDTScorer(lddt_settings) + for mapped_lddt_scorer in oligo_lddt_scorer.mapped_lddt_scorers: + # Get data + lddt_scorer = mapped_lddt_scorer.lddt_scorer + model_chain = mapped_lddt_scorer.model_chain_name + reference_chain = mapped_lddt_scorer.reference_chain_name + if skip_score: + ost.LogInfo( + " --> Skipping single chain lDDT because " + "consistency check failed") + lddt_results["single_chain_lddt"].append({ + "status": "FAILURE", + "error": "Consistency check failed.", + "model_chain": model_chain, + "reference_chain": reference_chain, + "global_score": 0.0, + "conserved_contacts": 0.0, + "total_contacts": 0.0}) + else: + try: + ost.LogInfo((" --> Computing lDDT between model " + "chain %s and reference chain %s") % ( + model_chain, + reference_chain)) + ost.LogInfo("Global LDDT score: %.4f" % + lddt_scorer.global_score) + ost.LogInfo( + "(%i conserved distances out of %i checked, over " + "%i thresholds)" % (lddt_scorer.conserved_contacts, + lddt_scorer.total_contacts, + len(lddt_settings.cutoffs))) + sc_lddt_scores = { + "status": "SUCCESS", + "error": "", + "model_chain": model_chain, + "reference_chain": reference_chain, + "global_score": lddt_scorer.global_score, + "conserved_contacts": + lddt_scorer.conserved_contacts, + "total_contacts": lddt_scorer.total_contacts} + if opts.save_per_residue_scores: + per_residue_sc = \ + mapped_lddt_scorer.GetPerResidueScores() + ost.LogInfo("Per residue local lDDT (reference):") + ost.LogInfo("Chain\tResidue Number\tResidue Name" + "\tlDDT\tConserved Contacts\tTotal " + "Contacts") + for prs_scores in per_residue_sc: + ost.LogInfo("%s\t%i\t%s\t%.4f\t%i\t%i" % ( + reference_chain, + prs_scores["residue_number"], + prs_scores["residue_name"], + prs_scores["lddt"], + prs_scores["conserved_contacts"], + prs_scores["total_contacts"])) + sc_lddt_scores["per_residue_scores"] = \ + per_residue_sc + lddt_results["single_chain_lddt"].append( + sc_lddt_scores) + except Exception as ex: + ost.LogError('Single chain lDDT failed:', str(ex)) + lddt_results["single_chain_lddt"].append({ + "status": "FAILURE", + "error": str(ex), + "model_chain": model_chain, + "reference_chain": reference_chain, + "global_score": 0.0, + "conserved_contacts": 0.0, + "total_contacts": 0.0}) + # perform oligo lddt scoring + if skip_score: + ost.LogInfo( + " --> Skipping oligomeric lDDT because consistency " + "check failed") + lddt_results["oligo_lddt"] = { + "status": "FAILURE", + "error": "Consistency check failed.", + "global_score": 0.0} + else: + try: + ost.LogInfo(' --> Computing oligomeric lDDT score') + lddt_results["oligo_lddt"] = { + "status": "SUCCESS", + "error": "", + "global_score": oligo_lddt_scorer.oligo_lddt} + ost.LogInfo( + "Oligo lDDT score: %.4f" % + oligo_lddt_scorer.oligo_lddt) + except Exception as ex: + ost.LogError('Oligo lDDT failed:', str(ex)) + lddt_results["oligo_lddt"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0} + if skip_score: + ost.LogInfo( + " --> Skipping weighted lDDT because consistency " + "check failed") + lddt_results["weighted_lddt"] = { + "status": "FAILURE", + "error": "Consistency check failed.", + "global_score": 0.0} + else: + try: + ost.LogInfo(' --> Computing weighted lDDT score') + lddt_results["weighted_lddt"] = { + "status": "SUCCESS", + "error": "", + "global_score": oligo_lddt_scorer.weighted_lddt} + ost.LogInfo( + "Weighted lDDT score: %.4f" % + oligo_lddt_scorer.weighted_lddt) + except Exception as ex: + ost.LogError('Weighted lDDT failed:', str(ex)) + lddt_results["weighted_lddt"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0} + reference_results["lddt"] = lddt_results + model_results[reference_name] = reference_results + if opts.dump_structures: + ost.LogInfo("-" * 80) + ref_output_path = os.path.join( + os.path.dirname(opts.reference), + reference_name + opts.dump_suffix) + ost.LogInfo("Saving cleaned up reference to %s" % + ref_output_path) + try: + SavePDB(qs_scorer.qs_ent_1.ent, + ref_output_path) + except Exception as ex: + ost.LogError("Cannot save reference: %s" % str(ex)) + mdl_output_path = os.path.join( + os.path.dirname(opts.model), + model_name + opts.dump_suffix) + ost.LogInfo("Saving cleaned up model to %s" % + mdl_output_path) + try: + SavePDB(qs_scorer.qs_ent_2.ent, + mdl_output_path) + except Exception as ex: + ost.LogError("Cannot save model: %s" % str(ex)) + result["result"][model_name] = model_results + + if all(skipped) and len(skipped) > 0: + ost.LogError("Consistency check failed for all model-reference pairs.") + if opts.output is not None: + ost.LogInfo("#" * 80) + ost.LogInfo("Saving output into %s" % opts.output) + with open(opts.output, "w") as outfile: + outfile.write(json.dumps(result, indent=4)) + + +if __name__ == '__main__': + # make script 'hot' + unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0) + sys.stdout = unbuffered + _Main() diff --git a/cmake_support/OST.cmake b/cmake_support/OST.cmake index 8e4e243432396a870b007da628e04713ac021c6f..4364b0c9407b4f82c3cd1cbb41f69cb09c217a73 100644 --- a/cmake_support/OST.cmake +++ b/cmake_support/OST.cmake @@ -929,3 +929,49 @@ macro(setup_boost) set(BOOST_THREAD ${Boost_LIBRARIES}) set(Boost_LIBRARIES) endmacro() + + +#------------------------------------------------------------------------------- +# Synopsis: +# ost_action_init() +# +# Description: +# Initialise cached variables +#------------------------------------------------------------------------------- +macro(ost_action_init) + set(OST_ACTION_NAMES "" CACHE INTERNAL "" FORCE) +endmacro(ost_action_init) + +#------------------------------------------------------------------------------- +# Synopsis: +# ost_action(ACTION TARGET) +# +# Description: +# Add a script to actions. +# ACTION script to be added (needs to have permissions to be executed) +# TARGET make target to add the action to +#------------------------------------------------------------------------------- +macro(ost_action ACTION TARGET) + copy_if_different("${CMAKE_CURRENT_SOURCE_DIR}" + "${STAGE_DIR}/${LIBEXEC_PATH}" + "${ACTION}" "TARGETS" ${TARGET}) + install(FILES "${ACTION}" DESTINATION "${LIBEXEC_PATH}" + PERMISSIONS WORLD_EXECUTE GROUP_EXECUTE OWNER_EXECUTE + WORLD_READ GROUP_READ OWNER_READ) + # storing tool names for bash completion + string(REGEX REPLACE "^ost-" "" stripped_action ${ACTION}) + if(DEFINED OST_ACTION_NAMES) + if(${OST_ACTION_NAMES} MATCHES "${stripped_action}") + set(_ACTION_NAMES "${OST_ACTION_NAMES}") + else() + if("${OST_ACTION_NAMES}" STREQUAL "") + set(_ACTION_NAMES "${stripped_action}") + else() + set(_ACTION_NAMES "${OST_ACTION_NAMES} ${stripped_action}") + endif() + endif() + else() + set(_ACTION_NAMES "${stripped_action}") + endif() + set(OST_ACTION_NAMES "${_ACTION_NAMES}" CACHE INTERNAL "" FORCE) +endmacro(ost_action) \ No newline at end of file diff --git a/modules/bindings/pymod/clustalw.py b/modules/bindings/pymod/clustalw.py index e5400d1ac58d9d621ff27fa1b403b7e6421a7f57..29af869f445f1c1353c2673770048125c507a4fc 100644 --- a/modules/bindings/pymod/clustalw.py +++ b/modules/bindings/pymod/clustalw.py @@ -6,7 +6,7 @@ import subprocess def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, clustalw_option_string=False): ''' - Runs a clustalw multiple sequence alignment. The results are returned as a + Runs a ClustalW multiple sequence alignment. The results are returned as a :class:`~ost.seq.AlignmentHandle` instance. There are two ways to use this function: @@ -36,17 +36,25 @@ def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`. - :param clustalw: path to clustalw executable (used in :func:`~ost.settings.Locate`) + :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`) :type clustalw: :class:`str` :param nopgap: turn residue-specific gaps off :type nopgap: :class:`bool` - :param clustalw_option_string: additional clustalw flags (see http://toolkit.tuebingen.mpg.de/clustalw/help_params) + :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt) :type clustalw_option_string: :class:`str` :param keep_files: do not delete temporary files :type keep_files: :class:`bool` - Note: ClustalW will convert lowercase to uppercase, and change all '.' to '-'. - OST will convert and '?' to 'X' before aligning sequences with Clustalw. + .. note :: + + - In the passed sequences ClustalW will convert lowercase to uppercase, and + change all '.' to '-'. OST will convert and '?' to 'X' before aligning + sequences with ClustalW. + - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces, + only the part before the space is considered as sequence name. To avoid + surprises, you should remove spaces from the sequence name. + - Sequence names must be unique (:class:`ValueError` exception raised + otherwise). ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes: @@ -94,7 +102,8 @@ def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, sequence_names = set() for s in seq_list: - sequence_names.add(s.GetName()) + # we cut out anything after a space to be consistent with ClustalW behaviour + sequence_names.add(s.GetName().split(' ')[0]) if len(sequence_names) < len(seq_list): raise ValueError("ClustalW can only process sequences with unique identifiers!") diff --git a/modules/bindings/tests/test_clustalw.py b/modules/bindings/tests/test_clustalw.py index f5625ce9bc4636aa7e98071bad5021c3d22dc256..8f295088ed235ab3ad401eff4525b985df51280d 100644 --- a/modules/bindings/tests/test_clustalw.py +++ b/modules/bindings/tests/test_clustalw.py @@ -63,11 +63,13 @@ class TestClustalWBindings(unittest.TestCase): "Pairwise alignment with modified gap penalties differs from precomputed one" def testUniqueIdentifier(self): - seq1 = seq.CreateSequence('heelloo','AWESOME') - seq2 = seq.CreateSequence('heelloo','AWESOME') - - self.assertRaises(ValueError,clustalw.ClustalW,seq1,seq2) - + # common case + seq1 = seq.CreateSequence('heelloo', 'AWESOME') + seq2 = seq.CreateSequence('heelloo', 'AWESOME') + self.assertRaises(ValueError, clustalw.ClustalW, seq1, seq2) + # nasty case with spaces + seq2 = seq.CreateSequence('heelloo dear', 'AWESOME') + self.assertRaises(ValueError, clustalw.ClustalW, seq1, seq2) if __name__ == "__main__": # test if clustalw package is available on system, otherwise ignore tests diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst new file mode 100644 index 0000000000000000000000000000000000000000..779a95ea47b393a7ede6dd395f1f6d650fda576d --- /dev/null +++ b/modules/doc/actions.rst @@ -0,0 +1,376 @@ +.. ost-actions: + +OST Actions +================================================================================ + +A pure command line interface of OST is provided by actions. +You can execute ``ost -h`` for a list of possible actions and for every action, +you can type ``ost <ACTION> -h`` to get a description on its usage. + +Here we list the most prominent actions with simple examples. + +.. ost-compare-structures: + +Comparing two structures +-------------------------------------------------------------------------------- + +You can compare two structures in terms of quaternary structure score and +lDDT scores between two complexes from the command line with: + +.. code-block:: console + + $ ost compare-structures [-h] [-v VERBOSITY] -m MODEL -r REFERENCE + [-o OUTPUT] [-d] [-ds DUMP_SUFFIX] + [-rs REFERENCE_SELECTION] [-ms MODEL_SELECTION] + [-ca] [-ft] [-cl COMPOUND_LIBRARY] [-qs] + [-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [-rna] + [-l] [-ir INCLUSION_RADIUS] + [-ss SEQUENCE_SEPARATION] [-spr] [-ml] + [-rm REMOVE [REMOVE ...]] [-ce] [-mn] [-sc] + [-p PARAMETER_FILE] [-bt BOND_TOLERANCE] + [-at ANGLE_TOLERANCE] [-cc] + +By default the verbosity is set to 3 which will result in the informations +being shown in the console. The result can be (optionally) saved as JSON file +which is the preferred way of parsing it as the log output might change in the +future. Optionally, the local scores for lDDT can also be dumped to the output +file. Additionally, cleaned up structures can be saved to the disk. +The output file has following format: + +.. code-block:: none + + { + "result": { + "<MODEL NAME>": { # Model name extracted from the file name + "<REFERENCE NAME>": { # Reference name extracted from the file name + "info": { + "residue_names_consistent": <Are the residue numbers consistent? true or false>, + "mapping": <Mapping of chains eg. {"A": "B", "B": "A"}>, + "alignments": <list of chain-chain alignments in FASTA format> + } + }, + "lddt": { # calculated when --lddt (-l) option is selected + "oligo_lddt": { + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "global_score": <calculated oligomeric lddt score>, + }, + "weighted_lddt": { + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "global_score": <calculated weighted lddt score>, + }, + "single_chain_lddt": [ # a list of chain-chain lDDts + { + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "reference_chain": <name of the chain in reference>, + "model_chain": <name of the chain in model> + "global_score": <calculated single-chain lddt score>, + "conserved_contacts": <number of conserved contacts between model and reference>, + "total_contacts": <total number of contacts between model and reference>, + "per_residue_scores": [ # per-residue lDDT scores - calculated when --save-per-residue-scores (-spr) option is selected + { + "total_contacts": <total number of contacts between model and reference>, + "residue_name": <three letter code of the residue>, + "lddt": <residue lDDT score>, + "conserved_contacts": <number of conserved contacts between model and reference for given residue>, + "residue_number": <total number of contacts between model and reference for given residue> + }, + . + . + . + ] + } + ] + }, + "qs_score": { # calculated when --qs-score (-q) option is selected + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "global_score": <Global QS-score>, + "best_score": <Best QS-score>, + } + + } + } + }, + "options": {} # Options used to run the script + } + +The "result" filed is a dictionary mapping from model to reference as eg. in +mmCIF file there can be many entities and the script will compare all +combinations. + +Example usage: + +.. code-block:: console + + $ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/bu_target_01.pdb > reference.pdb + $ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb + $ $OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output.json --qs-score --residue-number-alignment --lddt --structural-checks --consistency-checks --inclusion-radius 15.0 --bond-tolerance 15.0 --angle-tolerance 15.0 --molck --remove oxt hyd unk --clean-element-column --map-nonstandard-residues + + ################################################################################ + Reading input files (fault_tolerant=False) + --> reading model from model.pdb + imported 2 chains, 396 residues, 3106 atoms; with 0 helices and 0 strands + --> reading reference from reference.pdb + imported 3 chains, 408 residues, 3011 atoms; with 0 helices and 0 strands + ################################################################################ + Cleaning up input with Molck + removing hydrogen atoms + --> removed 0 hydrogen atoms + removing OXT atoms + --> removed 0 OXT atoms + residue A.GLN54 is missing 4 atoms: 'CG', 'CD', 'OE1', 'NE2' + residue A.GLU55 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2' + residue A.ARG139 is missing 6 atoms: 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2' + residue B.THR53 is missing 1 atom: 'CG2' + residue B.GLN54 is missing 4 atoms: 'CG', 'CD', 'OE1', 'NE2' + residue B.GLU55 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2' + residue B.GLU61 is missing 1 atom: 'OE2' + residue B.GLU117 is missing 1 atom: 'O' + residue B.ARG120 is missing 2 atoms: 'NH1', 'NH2' + residue B.ARG142 is missing 2 atoms: 'NH1', 'NH2' + residue B.GLU148 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2' + residue B.PRO198 is missing 1 atom: 'O' + _.CL1 is not a standard amino acid + _.CL2 is not a standard amino acid + _.CL3 is not a standard amino acid + _.CL4 is not a standard amino acid + _.CA5 is not a standard amino acid + _.CA6 is not a standard amino acid + _.CA7 is not a standard amino acid + _.CA8 is not a standard amino acid + _.CA9 is not a standard amino acid + _.CL10 is not a standard amino acid + _.CL11 is not a standard amino acid + _.CL12 is not a standard amino acid + _.CL13 is not a standard amino acid + _.CL14 is not a standard amino acid + _.CL15 is not a standard amino acid + _.CA16 is not a standard amino acid + _.CA17 is not a standard amino acid + _.CA18 is not a standard amino acid + _.CA19 is not a standard amino acid + _.CA20 is not a standard amino acid + _.EDO21 is not a standard amino acid + _.EDO22 is not a standard amino acid + _.EDO23 is not a standard amino acid + _.EDO24 is not a standard amino acid + removing hydrogen atoms + --> removed 0 hydrogen atoms + removing OXT atoms + --> removed 0 OXT atoms + ################################################################################ + Performing structural checks + --> for reference(s) + Checking + Checking stereo-chemistry + Average Z-Score for bond lengths: -nan + Bonds outside of tolerance range: 0 out of 0 + Bond Avg Length Avg zscore Num Bonds + Average Z-Score angle widths: 0.00000 + Angles outside of tolerance range: 0 out of 1 + Filtering non-bonded clashes + 0 non-bonded short-range distances shorter than tolerance distance + Distances shorter than tolerance are on average shorter by: 0.00000 + --> for model(s) + Checking + Checking stereo-chemistry + Average Z-Score for bond lengths: -nan + Bonds outside of tolerance range: 0 out of 0 + Bond Avg Length Avg zscore Num Bonds + Average Z-Score angle widths: 0.00000 + Angles outside of tolerance range: 0 out of 1 + Filtering non-bonded clashes + 0 non-bonded short-range distances shorter than tolerance distance + Distances shorter than tolerance are on average shorter by: 0.00000 + ################################################################################ + Comparing to + Chains removed from : _ + Chains in : AB + Chains in : AB + Chemically equivalent chain-groups in pdb_1: [['B', 'A']] + Chemically equivalent chain-groups in pdb_2: [['A', 'B']] + Chemical chain-groups mapping: {('B', 'A'): ('A', 'B')} + Identifying Symmetry Groups... + Symmetry threshold 0.1 used for angles of pdb_1 + Symmetry threshold 0.1 used for axis of pdb_1 + Symmetry threshold 0.1 used for angles of pdb_2 + Symmetry threshold 0.1 used for axis of pdb_2 + Selecting Symmetry Groups... + Symmetry-groups used in pdb_1: [('B',), ('A',)] + Symmetry-groups used in pdb_2: [('A',), ('B',)] + Closed Symmetry with strict parameters + Mapping found: {'A': 'B', 'B': 'A'} + -------------------------------------------------------------------------------- + Checking consistency between and + Consistency check: OK + -------------------------------------------------------------------------------- + Computing QS-score + QSscore pdb_1, pdb_2: best: 0.90, global: 0.90 + -------------------------------------------------------------------------------- + Computing lDDT scores + lDDT settings: + Inclusion Radius: 15 + Sequence separation: 0 + Cutoffs: 0.5, 1, 2, 4 + Residue properties label: lddt + === + --> Computing lDDT between model chain B and reference chain A + Coverage: 1 (187 out of 187 residues) + Global LDDT score: 0.8257 + (877834 conserved distances out of 1063080 checked, over 4 thresholds) + --> Computing lDDT between model chain A and reference chain B + Coverage: 1 (197 out of 197 residues) + Global LDDT score: 0.7854 + (904568 conserved distances out of 1151664 checked, over 4 thresholds) + --> Computing oligomeric lDDT score + Reference pdb_1 has: 2 chains + Model pdb_2 has: 2 chains + Coverage: 1 (384 out of 384 residues) + Oligo lDDT score: 0.8025 + --> Computing weighted lDDT score + Weighted lDDT score: 0.8048 + ################################################################################ + Saving output into output.json + + +This reads the model and reference file and calculates QS-score between them. +In the example above the output file looks as follows: + +.. code-block:: python + + { + "result": { + "": { + "": { + "info": { + "residue_names_consistent": true, + "mapping": { + "chain_mapping": { + "A": "B", + "B": "A" + }, + "alignments": [ + ">reference:A\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSL---QELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVR-------LGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:B\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP", + ">reference:B\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:A\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP" + ] + } + }, + "lddt": { + "oligo_lddt": { + "status": "SUCCESS", + "global_score": 0.8025223016738892, + "error": "" + }, + "weighted_lddt": { + "status": "SUCCESS", + "global_score": 0.804789180710712, + "error": "" + }, + "single_chain_lddt": [ + { + "status": "SUCCESS", + "global_score": 0.8257459402084351, + "conserved_contacts": 877834, + "reference_chain": "A", + "total_contacts": 1063080, + "error": "", + "model_chain": "B" + }, + { + "status": "SUCCESS", + "global_score": 0.7854443788528442, + "conserved_contacts": 904568, + "reference_chain": "B", + "total_contacts": 1151664, + "error": "", + "model_chain": "A" + } + ] + }, + "qs_score": { + "status": "SUCCESS", + "global_score": 0.8974384796108209, + "best_score": 0.9022811630070536, + "error": "" + } + } + } + }, + "options": { + "reference": "reference.pdb", + "structural_checks": true, + "chain_mapping": null, + "bond_tolerance": 15.0, + "parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt", + "consistency_checks": true, + "qs_score": true, + "map_nonstandard_residues": true, + "save_per_residue_scores": false, + "fault_tolerant": false, + "reference_selection": "", + "cwd": "CWD", + "inclusion_radius": 15.0, + "angle_tolerance": 15.0, + "c_alpha_only": false, + "clean_element_column": true, + "dump_suffix": ".compare.structures.pdb", + "compound_library": "Path to stage/share/openstructure/compounds.chemlib", + "dump_structures": false, + "residue_number_alignment": true, + "verbosity": 3, + "remove": [ + "oxt", + "hyd", + "unk" + ], + "molck": true, + "sequence_separation": 0, + "output": "output.json", + "model": "model.pdb", + "lddt": true, + "model_selection": "" + } + } + +If only all the structures are clean one can omit all the checking steps and +calculate eg. QS-score directly: + +.. code:: console + + $OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output_qs.json --qs-score --residue-number-alignment + ################################################################################ + Reading input files (fault_tolerant=False) + --> reading model from model.pdb + imported 2 chains, 396 residues, 3106 atoms; with 0 helices and 0 strands + --> reading reference from reference.pdb + imported 3 chains, 408 residues, 3011 atoms; with 0 helices and 0 strands + ################################################################################ + Comparing model.pdb to reference.pdb + Chains removed from reference.pdb: _ + Chains in reference.pdb: AB + Chains in model.pdb: AB + Chemically equivalent chain-groups in reference.pdb: [['B', 'A']] + Chemically equivalent chain-groups in model.pdb: [['A', 'B']] + Chemical chain-groups mapping: {('B', 'A'): ('A', 'B')} + Identifying Symmetry Groups... + Symmetry threshold 0.1 used for angles of reference.pdb + Symmetry threshold 0.1 used for axis of reference.pdb + Symmetry threshold 0.1 used for angles of model.pdb + Symmetry threshold 0.1 used for axis of model.pdb + Selecting Symmetry Groups... + Symmetry-groups used in reference.pdb: [('B',), ('A',)] + Symmetry-groups used in model.pdb: [('A',), ('B',)] + Closed Symmetry with strict parameters + Mapping found: {'A': 'B', 'B': 'A'} + -------------------------------------------------------------------------------- + Checking consistency between model.pdb and reference.pdb + Consistency check: OK + -------------------------------------------------------------------------------- + Computing QS-score + QSscore reference.pdb, model.pdb: best: 0.90, global: 0.90 + ################################################################################ + Saving output into output_qs.json + diff --git a/modules/doc/contributing.rst b/modules/doc/contributing.rst index c87c2daa0dc39444490415d69b489f9c1d5dbdcb..c171b7c0ddf1304876a2a78e60d063ff6fc63cb2 100644 --- a/modules/doc/contributing.rst +++ b/modules/doc/contributing.rst @@ -109,3 +109,74 @@ If you got a patch from someone else and would like to use apply it to your repo .. code-block:: bash git am < changeset.diff + +Starting Your Own Action +-------------------------------------------------------------------------------- +In OST we call scripts/ programs 'actions'. They are started by a +launcher found in your staging directory at :file:`stage/bin/ost`. This little +guy helps keeping the shell environment in the right mood to carry out your +job. So usually you will start an action by + +.. code-block:: console + + $ stage/bin/ost --help + +Starting new action **do** go for a dedicated branch for action-development. +There you can produce intermediate commits while other branches stay clean in +case you have to do some work there which needs to get public. + +After preparing your repository its time to create a file for the action. That +is a bit different than for modules. Assuming we are sitting in the +repository's root: + +.. code-block:: console + + $ touch action/ost-awesome-action + $ chmod +x action/ost-awesome-action + +Two things are important here: actions are prefixed with :file:`ost-`, so they +are recognised by the :file:`ost` launcher. Secondly, action files need to be +executable, which does not propagate if you do it **after** the first call to +``make``. + +To get the new action recognised by ``make`` to be placed in +:file:`stage/libexec/openstructure`, it has to be registered with ``cmake`` in +:file:`actions/CMakeLists.txt`: + +.. code-block:: console + :linenos: + + add_custom_target(actions ALL) + + ost_action_init() + ost_action(ost-awesome-action actions) + +Just add your action with its full filename with a call to `ost_action` at +the end of the file. + +Now its time to fill your action with code. Instead of reading a lot more of +explanations, it should be easy to go by examples from the :file:`actions` +directory. There are only two really important points: + +* No shebang line (``#! /usr/bin/python``) in your action! Also no + ``#! /usr/bin/env python`` or anything like this. This may lead to funny side + effects, like calling a ``python`` interpreter from outside a virtual + environment or calling a different version. Basically it may mess up the + environment your action is running in. Actions are called by :file:`ost`, + that's enough to get everything just right. + +* The action of your action happens in the ost branch of the script. + Your action will have own function definitions, variables and all the bells + and whistles. Hiding behind ost keeps everything separated and makes + things easier when it gets to debugging. So just after + + .. code-block:: python + + import alot + + def functions_specific_to_your_action(...): + + if __name__ == "__main__": + <put together what your action should do here> + + start putting your action together. diff --git a/modules/gfx/src/vertex_array.cc b/modules/gfx/src/vertex_array.cc index 520e0d0e9c95f640c3578b99af95090e7f4aabec..847e38d14a0bcc1163ffcb383f9d7ce179a65353 100644 --- a/modules/gfx/src/vertex_array.cc +++ b/modules/gfx/src/vertex_array.cc @@ -1362,7 +1362,7 @@ void IndexedVertexArray::draw_aalines() {e3[0],e3[1],e3[2]}, {ve0.c[0],ve0.c[1],ve0.c[2],ve0.c[3]}, {ve1.c[0],ve1.c[1],ve1.c[2],ve1.c[3]}, - -0.5*(q0[2]+q1[2])}; + -0.5f*(q0[2]+q1[2])}; line_list.push_back(le); } diff --git a/modules/index.rst b/modules/index.rst index 2e8c37b274c8c40308f7aedb259fd51e1dd8888e..9ab3fdd1ddeaa810a0cfddeaa3c28083c3f22a9c 100644 --- a/modules/index.rst +++ b/modules/index.rst @@ -31,6 +31,7 @@ OpenStructure documentation table mol/alg/lddt mol/alg/molck + actions For Starters -------------------------------------------------------------------------------- @@ -106,6 +107,8 @@ Varia **Molck**: :doc:`Molecular Checker<mol/alg/molck>` +**Actions**: :doc:`OST Actions<actions>` + Extending OpenStructure -------------------------------------------------------------------------------- diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index aa3e15991ba46ea68d9ffeb68c95d6a3aed6a1c5..580c6650a8ead19afc4d372bc65b106fdb7027f2 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -351,37 +351,23 @@ before computing the lDDT scores it is required to pass parameter file based on Engh and Huber parameters, and on the atomic radii as defined in the Cambridge Structural Database. OpenStructure ships with default file called `stereo_chemical_props.txt` located in `$OST_ROOT/share/openstructure` -directory. A class :class:`~ost.io.StereoChemicalParamsReader` is used to read -this file. +directory. A function :func:`~ost.io.ReadStereoChemicalPropsFile` is used to +read this file. -.. class:: StereoChemicalParamsReader(filename="") - Object that holds and reads stereochemical parameters. - :param filename: Sets :attr:`filename`. +.. function:: ReadStereoChemicalPropsFile(filename="", check=True) - .. attribute:: bond_table + Read stereochemical parameters - if not provided a local version will be used. - The table containing bond information of type :class:`~ost.mol.alg.StereoChemicalParams`. + :param filename: The path to the parameter file that will be used. If set + to "", it reads the default file shipped with OpenStructure. + :type filename: :class:`str` + :param check: Raise an error when any of the resulting tables are empty. + :type check: :class:`bool` + :return: Object containing stereochemical parameters + :rtype: :class:`~ost.mol.alg.StereoChemicalProps` - .. attribute:: angle_table +.. function:: GetStereoChemicalPropsFile() - The table containing angle information of type :class:`~ost.mol.alg.StereoChemicalParams`. - - .. attribute:: nonbonded_table - - The table containing clashes of type :class:`~ost.mol.alg.ClashingDistances`. - - .. attribute:: filename - - The path to the parameter file that will be used. If set to "", it reads the - default file shipped with OpenStructure. - - :type: :class:`str` - - .. method:: Read(check=False) - - Read the file. - - :param check: Raise an error when any of the resulting tables are empty. - :type check: :class:`bool` + Get the default path to the stereochemical paramteres file. diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc index 8704a4657d5a7f152f96d339164976fe5bbeb0ce..a5abde8cc5966a3ddd7bc21b41416117f663641f 100644 --- a/modules/io/pymod/wrap_io.cc +++ b/modules/io/pymod/wrap_io.cc @@ -68,6 +68,9 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(save_entity_handle_ov, BOOST_PYTHON_FUNCTION_OVERLOADS(save_entity_view_ov, save_ent_view, 2, 3) +ost::mol::alg::StereoChemicalProps (*read_props_a)(String filename, bool check) = &ReadStereoChemicalPropsFile; +ost::mol::alg::StereoChemicalProps (*read_props_b)(bool check) = &ReadStereoChemicalPropsFile; + } void export_pdb_io(); @@ -121,14 +124,11 @@ BOOST_PYTHON_MODULE(_ost_io) def("LoadMAE", &LoadMAE); def("LoadPQR", &LoadPQR); - class_<io::StereoChemicalParamsReader>("StereoChemicalParamsReader", - init<String>(arg("filename"))) - .def(init<>()) - .def("Read", &io::StereoChemicalParamsReader::Read, (arg("check")=false)) - .def_readwrite("filename", &io::StereoChemicalParamsReader::filename) - .def_readwrite("bond_table", &io::StereoChemicalParamsReader::bond_table) - .def_readwrite("angle_table", &io::StereoChemicalParamsReader::angle_table) - .def_readwrite("nonbonded_table", &io::StereoChemicalParamsReader::nonbonded_table); + def("ReadStereoChemicalPropsFile", read_props_a, + (arg("filename"), arg("check")=true)); + def("ReadStereoChemicalPropsFile", read_props_b, + (arg("check")=true)); + def("GetStereoChemicalPropsFile", &GetStereoChemicalPropsFile); export_pdb_io(); export_mmcif_io(); diff --git a/modules/io/src/mol/stereochemical_params_reader.cc b/modules/io/src/mol/stereochemical_params_reader.cc index f0317a3ea19ae8dd78b675a4c08576555e5e659a..0201caf573168a860ae10190cef51ec526238ed2 100644 --- a/modules/io/src/mol/stereochemical_params_reader.cc +++ b/modules/io/src/mol/stereochemical_params_reader.cc @@ -18,47 +18,50 @@ //------------------------------------------------------------------------------ #include <boost/filesystem/fstream.hpp> #include <ost/platform.hh> -#include <ost/mol/alg/filter_clashes.hh> #include <ost/io/stereochemical_params_reader.hh> -namespace { - std::vector<String> ReadStereoChemicalFile(const String& filename){ - boost::filesystem::path loc(filename); - boost::filesystem::ifstream infile(loc); - if (!infile) { - std::stringstream serr; - serr << "Could not find " << filename; - throw ost::Error(serr.str()); - } - std::vector<String> stereo_chemical_props; - String line; - while (std::getline(infile, line)) - { - std::stringstream line_stream(line); - stereo_chemical_props.push_back(line); - } - - return stereo_chemical_props; - } -} - namespace ost { namespace io { -StereoChemicalParamsReader::StereoChemicalParamsReader() { - filename = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; -} +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(String filename, bool check){ + boost::filesystem::path loc(filename); + boost::filesystem::ifstream infile(loc); + if (!infile) { + std::stringstream serr; + serr << "Could not find parameter file '" << filename << "'"; + throw ost::Error(serr.str()); + } + std::vector<String> stereo_chemical_props; + String line; + while (std::getline(infile, line)) + { + std::stringstream line_stream(line); + stereo_chemical_props.push_back(line); + } -StereoChemicalParamsReader::StereoChemicalParamsReader(const String& init_filename): filename(init_filename) {} + ost::mol::alg::StereoChemicalParams bond_table; + ost::mol::alg::StereoChemicalParams angle_table; + ost::mol::alg::ClashingDistances nonbonded_table; -void StereoChemicalParamsReader::Read(bool check) { - std::vector<String> stereo_chemical_props = ReadStereoChemicalFile(filename); - // Bonds bond_table = ost::mol::alg::FillStereoChemicalParams("Bond", stereo_chemical_props, check); // Angles angle_table = ost::mol::alg::FillStereoChemicalParams("Angle", stereo_chemical_props, check); // Not bonded nonbonded_table = ost::mol::alg::FillClashingDistances(stereo_chemical_props, check); + + return ost::mol::alg::StereoChemicalProps(bond_table, angle_table, nonbonded_table); +} + +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(bool check) { + String filename = GetStereoChemicalPropsFile(); + return ReadStereoChemicalPropsFile(filename, check); } + +String GetStereoChemicalPropsFile() { + String filename; + filename = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; + return filename; +} + }} // ns diff --git a/modules/io/src/mol/stereochemical_params_reader.hh b/modules/io/src/mol/stereochemical_params_reader.hh index f8990f55268de150d6f17a28160f53e695781bfa..a5d33acce7888bdcd88b74ec0aecd38a120eb9d6 100644 --- a/modules/io/src/mol/stereochemical_params_reader.hh +++ b/modules/io/src/mol/stereochemical_params_reader.hh @@ -20,20 +20,13 @@ #define OST_IO_STEREOCHEMICAL_PARAMS_READER_H #include <ost/io/module_config.hh> -#include <ost/mol/alg/filter_clashes.hh> +#include <ost/mol/alg/local_dist_diff_test.hh> namespace ost { namespace io { -struct StereoChemicalParamsReader { - String filename; - ost::mol::alg::StereoChemicalParams bond_table; - ost::mol::alg::StereoChemicalParams angle_table; - ost::mol::alg::ClashingDistances nonbonded_table; - - StereoChemicalParamsReader(); - StereoChemicalParamsReader(const String& filename); - void Read(bool check=false); -}; +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(String filename, bool check=false); +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(bool check=false); +String GetStereoChemicalPropsFile(); }} // ns diff --git a/modules/mol/alg/doc/lddt.rst b/modules/mol/alg/doc/lddt.rst index 6529c47758e29d8fa292c475d7a212ce25cc9e56..3201243bdaa59551cc73e543e16100c161bfb5f4 100644 --- a/modules/mol/alg/doc/lddt.rst +++ b/modules/mol/alg/doc/lddt.rst @@ -136,15 +136,13 @@ for bonds and angles respectively. For steric clashes, the lddt executable recovers atomic radii and clashing tolerance distances from the parameter file, depending on the atomic element under -investigation. When an atomic element cannot be determined, the lddt executable -uses a default atomic radius of 1.5 Angstrom. This value can be overriden using -the -m value, passing a new radius (in Ansgstroms) to the program. +investigation. For example: .. code-block:: bash - lddt -f -p stereo_chemical_params.txt -b 8 -a 8 -m 1.0 mdl1.pdb ref.pdb + lddt -f -p stereo_chemical_params.txt -b 8 -a 8 mdl1.pdb ref.pdb ----------------------------- @@ -198,11 +196,16 @@ One can replicate the binary using simple python script: CheckStructure, LocalDistDiffTest, GetlDDTPerResidueStats, - PrintlDDTPerResidueStats) - from ost.io import StereoChemicalParamsReader + PrintlDDTPerResidueStats, + ResidueNamesMatch) + from ost.io import ReadStereoChemicalPropsFile model_path = "Path to your model pdb file" reference_path = "Path to your reference pdb file" + structural_checks = True + bond_tolerance = 12 + angle_tolerance = 12 + cutoffs = [0.5, 1.0, 2.0, 4.0] # # Load model and prepare its view model = LoadPDB(model_path) @@ -220,29 +223,44 @@ One can replicate the binary using simple python script: CleanlDDTReferences(references) # # Prepare residue map from references - rdmap = PreparelDDTGlobalRDMap(references, settings) + rdmap = PreparelDDTGlobalRDMap(references, + cutoffs=cutoffs, + sequence_separation=settings.sequence_separation, + radius=settings.radius) # # This part is optional and it depends on our settings parameter - if settings.structural_checks: - stereochemical_parameters = StereoChemicalParamsReader( - settings.parameter_file_path) - stereochemical_parameters.Read() + if structural_checks: + stereochemical_parameters = ReadStereoChemicalPropsFile() CheckStructure(ent=model_view, bond_table=stereochemical_parameters.bond_table, angle_table=stereochemical_parameters.angle_table, nonbonded_table=stereochemical_parameters.nonbonded_table, - bond_tolerance=settings.bond_tolerance, - angle_tolerance=settings.angle_tolerance) + bond_tolerance=bond_tolerance, + angle_tolerance=angle_tolerance) + # + # Check consistency + is_cons = ResidueNamesMatch(model_view, references[0], True) + print "Consistency check: ", "OK" if is_cons else "ERROR" # # Calculate lDDT - LocalDistDiffTest(model_view, references, rdmap, settings) + LocalDistDiffTest(model_view, + references, + rdmap, + settings) # # Get the local scores - local_scores = GetlDDTPerResidueStats(model, rdmap, settings) + local_scores = GetlDDTPerResidueStats(model_view, + rdmap, + structural_checks, + settings.label) # # Pring local scores - PrintlDDTPerResidueStats(local_scores, settings) + PrintlDDTPerResidueStats(local_scores, structural_checks, len(cutoffs)) + +Similar effect could be obtained using lDDTScorer. See :class:`~ost.mol.alg.lDDTScorer` +for a simple example. + -This can be useful when we already have an models and references already read -in the memory and we do not want run the binary. +The Python API can be useful when we already have an models and references already +read in the memory and we do not want run the binary. Please refere to specific function documentation for more details. diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index d4b71376c848fc14e9e0dcb1abecb7421e8debc1..81f3e77643c0eae354f789288741fd2d43de2b94 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -306,7 +306,7 @@ Local Distance Test scores (lDDT, DRMSD) :returns: :class:`~ost.mol.alg.GlobalRDMap` -.. function:: PreparelDDTGlobalRDMap(reference_list, settings) +.. function:: PreparelDDTGlobalRDMap(reference_list, cutoff_list, sequence_separation, max_dist) A wrapper around :func:`CreateDistanceList` and :func:`CreateDistanceListFromMultipleReferences`. Depending on the length of @@ -315,9 +315,12 @@ Local Distance Test scores (lDDT, DRMSD) :param reference_list: a list of reference structures from which distances are derived :type reference_list: list of :class:`~ost.mol.EntityView` - :param settings: lDDT settings - :type settings: :class:`~ost.mol.alg.lDDTSettings` - + :param max_dist: the inclusion radius in Angstroms (to determine which + distances are checked for conservation) + :type max_dist: :class:`float` + :param sequence_separation: sequence separation parameter ie. maximum distance + between two sequences. + :type sequence_separation: :class:`int` :returns: :class:`~ost.mol.alg.GlobalRDMap` @@ -356,12 +359,12 @@ Local Distance Test scores (lDDT, DRMSD) :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillClashingDistances` :type nonbonded_table: :class:`~ost.mol.alg.ClashingDistances` :param bond_tolerance: Tolerance in stddev for bonds - :type bond_tolerance: float + :type bond_tolerance: :class:`float` :param angle_tolerance: Tolerance in stddev for angles - :type angle_tolerance: float + :type angle_tolerance: :class:`float` -.. function:: GetlDDTPerResidueStats(model, distance_list, settings) +.. function:: GetlDDTPerResidueStats(model, distance_list, structural_checks, label) Get the per-residue statistics from the lDDT calculation. @@ -369,20 +372,25 @@ Local Distance Test scores (lDDT, DRMSD) :type model: :class:`~ost.mol.EntityHandle` :param distance_list: The list of distances to check for conservation :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` - :param settings: lDDT settings - :type settings: :class:`~ost.mol.alg.lDDTSettings` + :param structural_checks: Were structural checks performed on the model? + :type structural_checks: :class:`bool` + :param label: Label used for ResidueHandle properties that store the local + scores. + :type label: :class:`str` :returns: Per-residue local lDDT scores :rtype: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` -.. function:: PrintlDDTPerResidueStats(scores, settings) +.. function:: PrintlDDTPerResidueStats(scores, structural_checks, cutoffs_length) Print per-residue statistics from lDDT calculation. :param scores: Local lDDT scores :type scores: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` - :param settings: lDDT settings - :type settings: :class:`~ost.mol.alg.lDDTSettings` + :param structural_checks: Where structural checks performed on the model? + :type structural_checks: :class:`bool` + :param cutoffs_length: Length of the cutoffs list used to calculate lDDT + :type cutoffs_length: :class:`int` .. class:: lDDTLocalScore(cname, rname, rnum, is_assessed, quality_problems, \ @@ -445,7 +453,7 @@ Local Distance Test scores (lDDT, DRMSD) .. attribute:: total_dist - Total number of conserved distances. + Total number of distances. :type: :class:`int` @@ -465,44 +473,47 @@ Local Distance Test scores (lDDT, DRMSD) :type structural_checks: bool :param cutoffs_length: Length of the cutoffs list used for calculations :type cutoffs_length: int + +.. class:: StereoChemicalProps(bond_table, angle_table, nonbonded_table) + + Object containing the stereo-chemical properties read form stereochmical_props.txt + file. + + :param bond_table: Sets :attr:`bond_table` + :param angle_table: Sets :attr:`angle_table` + :param nonbonded_table: Sets :attr:`nonbonded_table` + + .. attribute:: bond_table + + Object containing bond parameters + + :type: :class:`~ost.mol.alg.StereoChemicalParams` + + .. attribute:: angle_table + + Object containing angle parameters + + :type: :class:`~ost.mol.alg.StereoChemicalParams` + + .. attribute:: nonbonded_table + + Object containing clashing distances parameters + + :type: :class:`~ost.mol.alg.ClashingDistances` -.. class:: lDDTSettings(bond_tolerance=12, \ - angle_tolerance=12, \ - radius=15, \ +.. class:: lDDTSettings(radius=15, \ sequence_separation=0, \ - sel="", \ - parameter_file_path="", \ - structural_checks=True, \ - consistency_checks=True, \ cutoffs=(0.5, 1.0, 2.0, 4.0), \ label="locallddt") Object containing the settings used for lDDT calculations. - :param bond_tolerance: Sets :attr:`bond_tolerance`. - :param angle_tolerance: Sets :attr:`angle_tolerance`. :param radius: Sets :attr:`radius`. :param sequence_separation: Sets :attr:`sequence_separation`. - :param sel: Sets :attr:`sel`. - :param parameter_file_path: Sets :attr:`parameter_file_path`. - :param structural_checks: Sets :attr:`structural_checks`. - :param consistency_checks: Sets :attr:`consistency_checks`. :param cutoffs: Sets :attr:`cutoffs`. :param label: Sets :attr:`label`. - .. attribute:: bond_tolerance - - Tolerance in stddevs for bonds. - - :type: :class:`float` - - .. attribute:: angle_tolerance - - Tolerance in stddevs for angles. - - :type: :class:`float` - .. attribute:: radius Distance inclusion radius. @@ -515,32 +526,6 @@ Local Distance Test scores (lDDT, DRMSD) :type: :class:`int` - .. attribute:: sel - - Selection performed on reference(s). - - :type: :class:`str` - - .. attribute:: parameter_file_path - - Path to the stereochemical parameter file. If set to "", it the default file - shipped with OpenStructure is used (see - :class:`~ost.io.StereoChemicalParamsReader`). - - :type: :class:`str` - - .. attribute:: structural_checks - - Are structural checks and filter input data on? - - :type: :class:`bool` - - .. attribute:: consistency_checks - - Are consistency checks on? - - :type: :class:`bool` - .. attribute:: cutoffs List of thresholds used to determine distance conservation. @@ -553,13 +538,6 @@ Local Distance Test scores (lDDT, DRMSD) :type: :class:`str` - .. method:: SetStereoChemicalParamsPath(path) - - Set the path to the stereochemical parameter file. - - :param path: Path to stereochemical parameter file - :type path: str - .. method:: PrintParameters() Print settings. @@ -569,6 +547,102 @@ Local Distance Test scores (lDDT, DRMSD) :return: String representation of the lDDTSettings object. :rtype: :class:`str` +.. class:: lDDTScorer(reference, model, settings) + + Object to compute lDDT scores. + + Example usage. + + .. code:: python + + #! /bin/env python + """Run lDDT from within script.""" + from ost.io import LoadPDB + from ost.mol.alg import (CleanlDDTReferences, + lDDTSettings, lDDTScorer) + from ost.io import ReadStereoChemicalPropsFile + + ent_full = LoadPDB('3ia3', remote=True) + model_view = ent_full.Select('cname=A') + references = [ent_full.Select('cname=C')] + + # + # Initialize settings with default parameters and print them + settings = lDDTSettings() + settings.PrintParameters() + + # Clean up references + CleanlDDTReferences(references) + # + # Calculate lDDT + scorer = lDDTScorer(references=references, model=model_view, settings=settings) + print "Global score:", scorer.global_score + scorer.PrintPerResidueStats() + + :param references: Sets :attr:`references` + :param model: Sets :attr:`model` + :param settings: Sets :attr:`settings` + + .. attribute:: references + + A list of reference structures. + + :type: list(:class:`~ost.mol.EntityView`) + + .. attribute:: model + + A model structure. + + :type: :class:`~ost.mol.EntityView` + + .. attribute:: settings + + Settings used to calculate lDDT. + + :type: :class:`~ost.mol.alg.lDDTSettings` + + .. attribute:: global_dist_list + + Global map of residue properties. + + :type: :class:`~ost.mol.alg.GlobalRDMap` + + .. attribute:: global_score + + Global lDDT score. It is calculated as :attr:`conserved_contacts` divided + by :attr:`total_contacts`. + + :type: float + + .. attribute:: conserved_contacts + + Number of conserved distances. + + :type: int + + .. attribute:: total_contacts + + Number of total distances. + + :type: + + .. attribute:: local_scores + + Local scores. For each of the residue lDDT is it is calculated as residue + conserved contacts divided by residue total contacts. + + :type: list(:class:`~ost.mol.alg.lDDTLocalScore`) + + .. attribute:: is_valid + + Is the calculated score valid? + + :type: bool + + .. method:: PrintPerResidueStats + + Print per-residue statistics. + .. class:: UniqueAtomIdentifier(chain, residue_number, residue_name, atom_name) @@ -1587,8 +1661,10 @@ to standard amino acids. :type src_res: :class:`~ost.mol.ResidueHandle` :param dst_res: The destination residue :type dst_res: :class:`~ost.mol.ResidueHandle` + :param editor: Editor used to modify *dst_res*. + :type editor: :class:`~ost.mol.XCSEditor` - :returns: true if the residue could be copied, false if not. + :returns: True if the residue could be copied, False if not. .. function:: CopyConserved(src_res, dst_res, editor) @@ -1608,8 +1684,10 @@ to standard amino acids. :type src_res: :class:`~ost.mol.ResidueHandle` :param dst_res: The destination residue :type dst_res: :class:`~ost.mol.ResidueHandle` + :param editor: Editor used to modify *dst_res*. + :type editor: :class:`~ost.mol.XCSEditor` - :returns: a tuple of bools stating whether the residue could be copied and + :returns: A tuple of bools stating whether the residue could be copied and whether the Cbeta atom was inserted into the ``dst_res``. .. function:: CopyNonConserved(src_res, dst_res, editor) @@ -1621,8 +1699,10 @@ to standard amino acids. :type src_res: :class:`~ost.mol.ResidueHandle` :param dst_res: The destination residue :type dst_res: :class:`~ost.mol.ResidueHandle` + :param editor: Editor used to modify *dst_res*. + :type editor: :class:`~ost.mol.XCSEditor` - :returns: a tuple of bools stating whether the residue could be copied and + :returns: A tuple of bools stating whether the residue could be copied and whether the Cbeta atom was inserted into the ``dst_res``. @@ -1840,4 +1920,4 @@ API :param ent: Structure to check :type ent: :class:`~ost.mol.EntityHandle` :param lib: Compound library - :type lib: :class:`~ost.conop.CompoundLib` \ No newline at end of file + :type lib: :class:`~ost.conop.CompoundLib` diff --git a/modules/mol/alg/doc/molck.rst b/modules/mol/alg/doc/molck.rst index 5936b0584c5b1c3fb8697bb8151859a0a28b017d..1dfaf6addeb0eca91744505ad34d9efb4a0eb6a2 100644 --- a/modules/mol/alg/doc/molck.rst +++ b/modules/mol/alg/doc/molck.rst @@ -18,7 +18,7 @@ To check one PDB file (struc1.pdb) with Molck, use the following command: molck --complib <PATH TO COMPOUND LIB> struc1.pdb -The checked and cleaned file will be saved by default ad struc1-molck.pdb. +The checked and cleaned file will be saved by default ad struc1-molcked.pdb. Similarly it is possible to check a list of PDB files: diff --git a/modules/mol/alg/pymod/qsscoring.py b/modules/mol/alg/pymod/qsscoring.py index 16635d7025e330345dfc20109cf19fd63ffc4257..5a3477c903960d68d151739f77004914c7307721 100644 --- a/modules/mol/alg/pymod/qsscoring.py +++ b/modules/mol/alg/pymod/qsscoring.py @@ -1,5 +1,6 @@ """ -Scoring of quaternary structures as in Martino's 2017 paper. +Scoring of quaternary structures (QS). The QS scoring is according to the paper +by `Bertoni et al. <https://dx.doi.org/10.1038/s41598-017-09654-8>`_. .. note :: @@ -16,9 +17,11 @@ Scoring of quaternary structures as in Martino's 2017 paper. Authors: Gerardo Tauriello, Martino Bertoni """ -from ost import mol, geom, conop, seq, settings +from ost import mol, geom, conop, seq, settings, PushVerbosityLevel from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug from ost.bindings.clustalw import ClustalW +from ost.mol.alg import lDDTScorer +from ost.seq.alg.renumber import Renumber import numpy as np from scipy.misc import factorial from scipy.special import binom @@ -89,13 +92,27 @@ class QSscorer: -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared -> weight_extra_all = sum(w(d)) for all non-shared -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else - + + In the formulas above: + + * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for + "shared" contacts). + * "mapped": we could map chains of two structures and align residues in + :attr:`alignments`. + * "shared": pairs of residues which are "mapped" and have + "inter-chain contact" in both structures. + * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A + (fallback to CA-CA if :attr:`calpha_only` is True). + * "w(d)": weighting function (prob. of 2 res. to interact given CB distance) + from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_. + :param ent_1: First structure to be scored. :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView` :param ent_2: Second structure to be scored. :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView` + :param res_num_alignment: Sets :attr:`res_num_alignment` :raises: :class:`QSscoreError` if input structures are invalid or are monomers or have issues that make it impossible for a QS score to be computed. @@ -125,8 +142,15 @@ class QSscorer: of symmetries and chain mappings. By default it is set to 100. :type: :class:`int` + + .. attribute:: res_num_alignment + + Forces each alignment in :attr:`alignments` to be based on residue numbers + instead of using a global BLOSUM62-based alignment. + + :type: :class:`bool` """ - def __init__(self, ent_1, ent_2): + def __init__(self, ent_1, ent_2, res_num_alignment=False): # generate QSscoreEntity objects? if isinstance(ent_1, QSscoreEntity): self.qs_ent_1 = ent_1 @@ -147,6 +171,7 @@ class QSscorer: self.qs_ent_1.SetName(self.qs_ent_1.original_name) self.qs_ent_2.SetName(self.qs_ent_2.original_name) # set other public attributes + self.res_num_alignment = res_num_alignment self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only self.max_ca_per_chain_for_cm = 100 # init cached stuff @@ -161,9 +186,6 @@ class QSscorer: self._global_score = None self._best_score = None self._superposition = None - self._lddt_score = None - self._lddt_mdl = None - self._lddt_ref = None self._clustalw_bin = None @property @@ -355,8 +377,15 @@ class QSscorer: There will be one alignment for each mapped chain and they are ordered by their chain names in :attr:`qs_ent_1`. - The sequences of the alignments have views attached into - :attr:`QSscoreEntity.ent` of :attr:`qs_ent_1` and :attr:`qs_ent_2`. + The first sequence of each alignment belongs to :attr:`qs_ent_1` and the + second one to :attr:`qs_ent_2`. The sequences are named according to the + mapped chain names and have views attached into :attr:`QSscoreEntity.ent` + of :attr:`qs_ent_1` and :attr:`qs_ent_2`. + + If :attr:`res_num_alignment` is False, each alignment is performed using a + global BLOSUM62-based alignment. Otherwise, the positions in the alignment + sequences are simply given by the residue number so that residues with + matching numbers are aligned. :getter: Computed on first use (cached) :type: :class:`list` of :class:`~ost.seq.AlignmentHandle` @@ -364,7 +393,8 @@ class QSscorer: if self._alignments is None: self._alignments = _GetMappedAlignments(self.qs_ent_1.ent, self.qs_ent_2.ent, - self.chain_mapping) + self.chain_mapping, + self.res_num_alignment) return self._alignments @property @@ -434,65 +464,6 @@ class QSscorer: % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd)) return self._superposition - @property - def lddt_score(self): - """The multi-chain lDDT score. - - .. note:: - - lDDT is not considering over-prediction (i.e. extra chains) and hence is - not symmetric. Here, we consider :attr:`qs_ent_1` as the reference and - :attr:`qs_ent_2` as the model. The alignments from :attr:`alignments` are - used to map residue numbers and chains. - - The score is computed with OST's :func:`~ost.mol.alg.LocalDistDiffTest` - function with a single distance threshold of 2 A and an inclusion radius of - 8 A. You can use :attr:`lddt_mdl` and :attr:`lddt_ref` to get entities on - which you can call any other lDDT function with any other set of parameters. - - :getter: Computed on first use (cached) - :type: :class:`float` - """ - if self._lddt_score is None: - self._ComputeLDDT() - return self._lddt_score - - @property - def lddt_mdl(self): - """The model entity used for lDDT scoring (:attr:`lddt_score`) and annotated - with local scores. - - Local scores are available as residue properties named 'lddt' and on each - atom as a B-factor. Only CA atoms are considered if :attr:`calpha_only` is - True, otherwise this is an all-atom score. - - Since, the lDDT computation requires a single chain with mapped residue - numbering, all chains are appended into a single chain X with unique residue - numbers according to the column-index in the alignment. The alignments are - in the same order as they appear in :attr:`alignments`. Additional residues - are appended at the end of the chain with unique residue numbers. - - :getter: Computed on first use (cached) - :type: :class:`~ost.mol.EntityHandle` - """ - if self._lddt_mdl is None: - self._ComputeLDDT() - return self._lddt_mdl - - @property - def lddt_ref(self): - """The reference entity used for lDDT scoring (:attr:`lddt_score`). - - This is a single chain X with residue numbers matching ones in - :attr:`lddt_mdl` where aligned and unique numbers for additional residues. - - :getter: Computed on first use (cached) - :type: :class:`~ost.mol.EntityHandle` - """ - if self._lddt_ref is None: - self._ComputeLDDT() - return self._lddt_ref - @property def clustalw_bin(self): """ @@ -506,6 +477,21 @@ class QSscorer: self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2')) return self._clustalw_bin + def GetOligoLDDTScorer(self, settings, penalize_extra_chains=True): + """ + :return: :class:`OligoLDDTScorer` object, setup for this QS scoring problem. + :param settings: Passed to :class:`OligoLDDTScorer` constructor. + :param penalize_extra_chains: Passed to :class:`OligoLDDTScorer` constructor. + """ + if penalize_extra_chains: + return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent, + self.alignments, self.calpha_only, settings, + True, self.chem_mapping) + else: + return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent, + self.alignments, self.calpha_only, settings, False) + + ############################################################################## # Class internal helpers (anything that doesnt easily work without this class) ############################################################################## @@ -537,6 +523,8 @@ class QSscorer: def _ComputeScores(self): """Fills cached global_score and best_score.""" + if len(self.chain_mapping) < 2: + raise QSscoreError("QS-score is not defined for monomers") # get contacts if self.calpha_only: contacts_1 = self.qs_ent_1.contacts_ca @@ -554,22 +542,6 @@ class QSscorer: % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(), self._best_score, self._global_score)) - def _ComputeLDDT(self): - """Fills cached lddt_score, lddt_mdl and lddt_ref.""" - LogInfo('Computing lDDT score') - # check reference and model - ref, mdl = self.qs_ent_1.ent, self.qs_ent_2.ent - LogInfo('Reference %s has: %s chains' % (ref.GetName(), ref.chain_count)) - LogInfo('Model %s has: %s chains' % (mdl.GetName(), mdl.chain_count)) - if mdl.chain_count > ref.chain_count: - LogWarning('MODEL contains more chains than REFERENCE, ' - 'lDDT is not considering them') - # get single chain reference and model - self._lddt_ref, self._lddt_mdl = \ - _MergeAlignedChains(self.alignments, ref, mdl, self.calpha_only) - # score them (mdl and ref changed) and keep results - self._lddt_score = _ComputeLDDTScore(self._lddt_ref, self._lddt_mdl) - ############################################################################### # Entity with cached entries for QS scoring @@ -639,9 +611,8 @@ class QSscoreEntity(object): 'removing water, ligands and small peptides.') self.is_valid = False elif self.ent.chain_count == 1: - LogError('Structure ' + ent.GetName() + ' is a monomer. ' - 'QSscore is not defined for monomers.') - self.is_valid = False + LogWarning('Structure ' + ent.GetName() + ' is a monomer.') + self.is_valid = True else: self.is_valid = True # init cached stuff @@ -898,6 +869,528 @@ def GetContacts(entity, calpha_only, dist_thr=12.0): # DONE return contacts +############################################################################### +# Oligo-lDDT scores +############################################################################### + +class OligoLDDTScorer(object): + """Helper class to calculate oligomeric lDDT scores. + + This class can be used independently, but commonly it will be created by + calling :func:`QSscorer.GetOligoLDDTScorer`. + + .. note:: + + By construction, lDDT scores are not symmetric and hence it matters which + structure is the reference (:attr:`ref`) and which one is the model + (:attr:`mdl`). Extra residues in the model are generally not considered. + Extra chains in both model and reference can be considered by setting the + :attr:`penalize_extra_chains` flag to True. + + :param ref: Sets :attr:`ref` + :param mdl: Sets :attr:`mdl` + :param alignments: Sets :attr:`alignments` + :param calpha_only: Sets :attr:`calpha_only` + :param settings: Sets :attr:`settings` + :param penalize_extra_chains: Sets :attr:`penalize_extra_chains` + :param chem_mapping: Sets :attr:`chem_mapping`. Must be given if + *penalize_extra_chains* is True. + + .. attribute:: ref + mdl + + Full reference/model entity to be scored. The entity must contain all chains + mapped in :attr:`alignments` and may also contain additional ones which are + considered if :attr:`penalize_extra_chains` is True. + + :type: :class:`~ost.mol.EntityHandle` + + .. attribute:: alignments + + One alignment for each mapped chain of :attr:`ref`/:attr:`mdl` as defined in + :attr:`QSscorer.alignments`. The first sequence of each alignment belongs to + :attr:`ref` and the second one to :attr:`mdl`. Sequences must have sequence + naming and attached views as defined in :attr:`QSscorer.alignments`. + + :type: :class:`list` of :class:`~ost.seq.AlignmentHandle` + + .. attribute:: calpha_only + + If True, restricts lDDT score to CA only. + + :type: :class:`bool` + + .. attribute:: settings + + Settings to use for lDDT scoring. + + :type: :class:`~ost.mol.alg.lDDTSettings` + + .. attribute:: penalize_extra_chains + + If True, extra chains in both :attr:`ref` and :attr:`mdl` will penalize the + lDDT scores. + + :type: :class:`bool` + + .. attribute:: chem_mapping + + Inter-complex mapping of chemical groups as defined in + :attr:`QSscorer.chem_mapping`. Used to find "chem-mapped" chains in + :attr:`ref` for unmapped chains in :attr:`mdl` when penalizing scores. + Each unmapped model chain can add extra reference-contacts according to the + average total contacts of each single "chem-mapped" reference chain. If + there is no "chem-mapped" reference chain, a warning is shown and the model + chain is ignored. + + + Only relevant if :attr:`penalize_extra_chains` is True. + + :type: :class:`dict` with key = :class:`tuple` of chain names in + :attr:`ref` and value = :class:`tuple` of chain names in + :attr:`mdl`. + """ + + # NOTE: one could also allow computation of both penalized and unpenalized + # in same object -> must regenerate lddt_ref / lddt_mdl though + + def __init__(self, ref, mdl, alignments, calpha_only, settings, + penalize_extra_chains=False, chem_mapping=None): + # sanity checks + if chem_mapping is None and penalize_extra_chains: + raise RuntimeError("Must provide chem_mapping when requesting penalty " + "for extra chains!") + if not penalize_extra_chains: + # warn for unmapped model chains + unmapped_mdl_chains = self._GetUnmappedMdlChains(mdl, alignments) + if unmapped_mdl_chains: + LogWarning('MODEL contains chains unmapped to REFERENCE, ' + 'lDDT is not considering MODEL chains %s' \ + % str(list(unmapped_mdl_chains))) + # warn for unmapped reference chains + ref_chains = set(ch.name for ch in ref.chains) + mapped_ref_chains = set(aln.GetSequence(0).GetName() for aln in alignments) + unmapped_ref_chains = (ref_chains - mapped_ref_chains) + if unmapped_ref_chains: + LogWarning('REFERENCE contains chains unmapped to MODEL, ' + 'lDDT is not considering REFERENCE chains %s' \ + % str(list(unmapped_ref_chains))) + # prepare fields + self.ref = ref + self.mdl = mdl + self.alignments = alignments + self.calpha_only = calpha_only + self.settings = settings + self.penalize_extra_chains = penalize_extra_chains + self.chem_mapping = chem_mapping + self._sc_lddt = None + self._oligo_lddt = None + self._weighted_lddt = None + self._lddt_ref = None + self._lddt_mdl = None + self._oligo_lddt_scorer = None + self._mapped_lddt_scorers = None + self._ref_scorers = None + self._model_penalty = None + + @property + def oligo_lddt(self): + """Oligomeric lDDT score. + + The score is computed as conserved contacts divided by the total contacts + in the reference using the :attr:`oligo_lddt_scorer`, which uses the full + complex as reference/model structure. If :attr:`penalize_extra_chains` is + True, the reference/model complexes contain all chains (otherwise only the + mapped ones) and additional contacts are added to the reference's total + contacts for unmapped model chains according to the :attr:`chem_mapping`. + + The main difference with :attr:`weighted_lddt` is that the lDDT scorer + "sees" the full complex here (incl. inter-chain contacts), while the + weighted single chain score looks at each chain separately. + + :getter: Computed on first use (cached) + :type: :class:`float` + """ + if self._oligo_lddt is None: + LogInfo('Reference %s has: %s chains' \ + % (self.ref.GetName(), self.ref.chain_count)) + LogInfo('Model %s has: %s chains' \ + % (self.mdl.GetName(), self.mdl.chain_count)) + + # score with or w/o extra-chain penalty + if self.penalize_extra_chains: + denominator = self.oligo_lddt_scorer.total_contacts + denominator += self._GetModelPenalty() + if denominator > 0: + oligo_lddt = self.oligo_lddt_scorer.conserved_contacts \ + / float(denominator) + else: + oligo_lddt = 0.0 + else: + oligo_lddt = self.oligo_lddt_scorer.global_score + self._oligo_lddt = oligo_lddt + return self._oligo_lddt + + @property + def weighted_lddt(self): + """Weighted average of single chain lDDT scores. + + The score is computed as a weighted average of single chain lDDT scores + (see :attr:`sc_lddt_scorers`) using the total contacts of each single + reference chain as weights. If :attr:`penalize_extra_chains` is True, + unmapped chains are added with a 0 score and total contacts taken from + the actual reference chains or (for unmapped model chains) using the + :attr:`chem_mapping`. + + See :attr:`oligo_lddt` for a comparison of the two scores. + + :getter: Computed on first use (cached) + :type: :class:`float` + """ + if self._weighted_lddt is None: + scores = [s.global_score for s in self.sc_lddt_scorers] + weights = [s.total_contacts for s in self.sc_lddt_scorers] + nominator = sum([s * w for s, w in zip(scores, weights)]) + if self.penalize_extra_chains: + ref_scorers = self._GetRefScorers() + denominator = sum(s.total_contacts for s in ref_scorers.values()) + denominator += self._GetModelPenalty() + else: + denominator = sum(weights) + if denominator > 0: + self._weighted_lddt = nominator / float(denominator) + else: + self._weighted_lddt = 0.0 + return self._weighted_lddt + + @property + def lddt_ref(self): + """The reference entity used for oligomeric lDDT scoring + (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`). + + Since the lDDT computation requires a single chain with mapped residue + numbering, all chains of :attr:`ref` are appended into a single chain X with + unique residue numbers according to the column-index in the alignment. The + alignments are in the same order as they appear in :attr:`alignments`. + Additional residues are appended at the end of the chain with unique residue + numbers. Unmapped chains are only added if :attr:`penalize_extra_chains` is + True. Only CA atoms are considered if :attr:`calpha_only` is True. + + :getter: Computed on first use (cached) + :type: :class:`~ost.mol.EntityHandle` + """ + if self._lddt_ref is None: + self._PrepareOligoEntities() + return self._lddt_ref + + @property + def lddt_mdl(self): + """The model entity used for oligomeric lDDT scoring + (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`). + + Like :attr:`lddt_ref`, this is a single chain X containing all chains of + :attr:`mdl`. The residue numbers match the ones in :attr:`lddt_ref` where + aligned and have unique numbers for additional residues. + + :getter: Computed on first use (cached) + :type: :class:`~ost.mol.EntityHandle` + """ + if self._lddt_mdl is None: + self._PrepareOligoEntities() + return self._lddt_mdl + + @property + def oligo_lddt_scorer(self): + """lDDT Scorer object for :attr:`lddt_ref` and :attr:`lddt_mdl`. + + :getter: Computed on first use (cached) + :type: :class:`~ost.mol.alg.lDDTScorer` + """ + if self._oligo_lddt_scorer is None: + self._oligo_lddt_scorer = lDDTScorer( + references=[self.lddt_ref.Select("")], + model=self.lddt_mdl.Select(""), + settings=self.settings) + return self._oligo_lddt_scorer + + @property + def mapped_lddt_scorers(self): + """List of scorer objects for each chain mapped in :attr:`alignments`. + + :getter: Computed on first use (cached) + :type: :class:`list` of :class:`MappedLDDTScorer` + """ + if self._mapped_lddt_scorers is None: + self._mapped_lddt_scorers = list() + for aln in self.alignments: + mapped_lddt_scorer = MappedLDDTScorer(aln, self.calpha_only, + self.settings) + self._mapped_lddt_scorers.append(mapped_lddt_scorer) + return self._mapped_lddt_scorers + + @property + def sc_lddt_scorers(self): + """List of lDDT scorer objects extracted from :attr:`mapped_lddt_scorers`. + + :type: :class:`list` of :class:`~ost.mol.alg.lDDTScorer` + """ + return [mls.lddt_scorer for mls in self.mapped_lddt_scorers] + + @property + def sc_lddt(self): + """List of global scores extracted from :attr:`sc_lddt_scorers`. + + If scoring for a mapped chain fails, an error is displayed and a score of 0 + is assigned. + + :getter: Computed on first use (cached) + :type: :class:`list` of :class:`float` + """ + if self._sc_lddt is None: + self._sc_lddt = list() + for lddt_scorer in self.sc_lddt_scorers: + try: + self._sc_lddt.append(lddt_scorer.global_score) + except Exception as ex: + LogError('Single chain lDDT failed:', str(ex)) + self._sc_lddt.append(0.0) + return self._sc_lddt + + ############################################################################## + # Class internal helpers + ############################################################################## + + def _PrepareOligoEntities(self): + # simple wrapper to avoid code duplication + self._lddt_ref, self._lddt_mdl = _MergeAlignedChains( + self.alignments, self.ref, self.mdl, self.calpha_only, + self.penalize_extra_chains) + + @staticmethod + def _GetUnmappedMdlChains(mdl, alignments): + # assume model is second sequence in alignment and is named by chain + mdl_chains = set(ch.name for ch in mdl.chains) + mapped_mdl_chains = set(aln.GetSequence(1).GetName() for aln in alignments) + return (mdl_chains - mapped_mdl_chains) + + def _GetRefScorers(self): + # single chain lddt scorers for each reference chain (key = chain name) + if self._ref_scorers is None: + # collect from mapped_lddt_scorers + ref_scorers = dict() + for mapped_lddt_scorer in self.mapped_lddt_scorers: + ref_ch_name = mapped_lddt_scorer.reference_chain_name + ref_scorers[ref_ch_name] = mapped_lddt_scorer.lddt_scorer + # add new ones where needed + for ch in self.ref.chains: + if ch.name not in ref_scorers: + if self.calpha_only: + ref_chain = ch.Select('aname=CA') + else: + ref_chain = ch.Select('') + ref_scorers[ch.name] = lDDTScorer( + references=[ref_chain], + model=ref_chain, + settings=self.settings) + # store in cache + self._ref_scorers = ref_scorers + # fetch from cache + return self._ref_scorers + + def _GetModelPenalty(self): + # extra value to add to total number of distances for extra model chains + # -> estimated from chem-mapped reference chains + if self._model_penalty is None: + # sanity check + if self.chem_mapping is None: + raise RuntimeError("Must provide chem_mapping when requesting penalty " + "for extra model chains!") + # get cached ref_scorers + ref_scorers = self._GetRefScorers() + # get unmapped model chains + unmapped_mdl_chains = self._GetUnmappedMdlChains(self.mdl, self.alignments) + # map extra chains to ref. chains + model_penalty = 0 + for ch_name_mdl in sorted(unmapped_mdl_chains): + # get penalty for chain + cur_penalty = None + for cm_ref, cm_mdl in self.chem_mapping.iteritems(): + if ch_name_mdl in cm_mdl: + # penalize by an average of the chem. mapped ref. chains + cur_penalty = 0 + for ch_name_ref in cm_ref: + # assumes that total_contacts is cached (for speed) + cur_penalty += ref_scorers[ch_name_ref].total_contacts + cur_penalty /= float(len(cm_ref)) + break + # report penalty + if cur_penalty is None: + LogWarning('Extra MODEL chain %s could not be chemically mapped to ' + 'any chain in REFERENCE, lDDT cannot consider it!' \ + % ch_name_mdl) + else: + LogScript('Extra MODEL chain %s added to lDDT score by considering ' + 'chemically mapped chains in REFERENCE.' % ch_name_mdl) + model_penalty += cur_penalty + # store in cache + self._model_penalty = model_penalty + # fetch from cache + return self._model_penalty + + +class MappedLDDTScorer(object): + """A simple class to calculate a single-chain lDDT score on a given chain to + chain mapping as extracted from :class:`OligoLDDTScorer`. + + :param alignment: Sets :attr:`alignment` + :param calpha_only: Sets :attr:`calpha_only` + :param settings: Sets :attr:`settings` + + .. attribute:: alignment + + Alignment with two sequences named according to the mapped chains and with + views attached to both sequences (e.g. one of the items of + :attr:`QSscorer.alignments`). + + The first sequence is assumed to be the reference and the second one the + model. Since the lDDT score is not symmetric (extra residues in model are + ignored), the order is important. + + :type: :class:`~ost.seq.AlignmentHandle` + + .. attribute:: calpha_only + + If True, restricts lDDT score to CA only. + + :type: :class:`bool` + + .. attribute:: settings + + Settings to use for lDDT scoring. + + :type: :class:`~ost.mol.alg.lDDTSettings` + + .. attribute:: lddt_scorer + + lDDT Scorer object for the given chains. + + :type: :class:`~ost.mol.alg.lDDTScorer` + + .. attribute:: reference_chain_name + + Chain name of the reference. + + :type: :class:`str` + + .. attribute:: model_chain_name + + Chain name of the model. + + :type: :class:`str` + """ + def __init__(self, alignment, calpha_only, settings): + # prepare fields + self.alignment = alignment + self.calpha_only = calpha_only + self.settings = settings + self.lddt_scorer = None # set in _InitScorer + self.reference_chain_name = alignment.sequences[0].name + self.model_chain_name = alignment.sequences[1].name + self._old_number_label = "old_num" + self._extended_alignment = None # set in _InitScorer + # initialize lDDT scorer + self._InitScorer() + + def GetPerResidueScores(self): + """ + :return: Scores for each residue + :rtype: :class:`list` of :class:`dict` with one item for each residue + existing in model and reference: + + - "residue_number": Residue number in reference chain + - "residue_name": Residue number in reference chain + - "lddt": local lDDT + - "conserved_contacts": number of conserved contacts + - "total_contacts": total number of contacts + """ + scores = list() + assigned_residues = list() + # Make sure the score is calculated + self.lddt_scorer.global_score + for col in self._extended_alignment: + if col[0] != "-" and col.GetResidue(3).IsValid(): + ref_res = col.GetResidue(0) + mdl_res = col.GetResidue(1) + ref_res_renum = col.GetResidue(2) + mdl_res_renum = col.GetResidue(3) + if ref_res.one_letter_code != ref_res_renum.one_letter_code: + raise RuntimeError("Reference residue name mapping inconsistent: %s != %s" % + (ref_res.one_letter_code, + ref_res_renum.one_letter_code)) + if mdl_res.one_letter_code != mdl_res_renum.one_letter_code: + raise RuntimeError("Model residue name mapping inconsistent: %s != %s" % + (mdl_res.one_letter_code, + mdl_res_renum.one_letter_code)) + if ref_res.GetNumber().num != ref_res_renum.GetIntProp(self._old_number_label): + raise RuntimeError("Reference residue number mapping inconsistent: %s != %s" % + (ref_res.GetNumber().num, + ref_res_renum.GetIntProp(self._old_number_label))) + if mdl_res.GetNumber().num != mdl_res_renum.GetIntProp(self._old_number_label): + raise RuntimeError("Model residue number mapping inconsistent: %s != %s" % + (mdl_res.GetNumber().num, + mdl_res_renum.GetIntProp(self._old_number_label))) + if ref_res.qualified_name in assigned_residues: + raise RuntimeError("Duplicated residue in reference: " % + (ref_res.qualified_name)) + else: + assigned_residues.append(ref_res.qualified_name) + # check if property there (may be missing for CA-only) + if mdl_res_renum.HasProp(self.settings.label): + scores.append({ + "residue_number": ref_res.GetNumber().num, + "residue_name": ref_res.name, + "lddt": mdl_res_renum.GetFloatProp(self.settings.label), + "conserved_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_conserved"), + "total_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_total")}) + return scores + + ############################################################################## + # Class internal helpers (anything that doesnt easily work without this class) + ############################################################################## + + def _InitScorer(self): + # Use copy of alignment (extended by 2 extra sequences for renumbering) + aln = self.alignment.Copy() + # Get chains and renumber according to alignment (for lDDT) + reference = Renumber( + aln.GetSequence(0), + old_number_label=self._old_number_label).CreateFullView() + refseq = seq.CreateSequence( + "reference_renumbered", + aln.GetSequence(0).GetString()) + refseq.AttachView(reference) + aln.AddSequence(refseq) + model = Renumber( + aln.GetSequence(1), + old_number_label=self._old_number_label).CreateFullView() + modelseq = seq.CreateSequence( + "model_renumbered", + aln.GetSequence(1).GetString()) + modelseq.AttachView(model) + aln.AddSequence(modelseq) + # Filter to CA-only if desired (done after AttachView to not mess it up) + if self.calpha_only: + self.lddt_scorer = lDDTScorer( + references=[reference.Select('aname=CA')], + model=model.Select('aname=CA'), + settings=self.settings) + else: + self.lddt_scorer = lDDTScorer( + references=[reference], + model=model, + settings=self.settings) + # Store alignment for later + self._extended_alignment = aln ############################################################################### # HELPERS @@ -909,7 +1402,7 @@ def _AlignAtomSeqs(seq_1, seq_2): """ :type seq_1: :class:`ost.seq.SequenceHandle` :type seq_2: :class:`ost.seq.SequenceHandle` - :return: Alignment of two sequences using a global aignment. Views attached + :return: Alignment of two sequences using a global alignment. Views attached to the input sequences will remain attached in the aln. :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed. """ @@ -924,6 +1417,28 @@ def _AlignAtomSeqs(seq_1, seq_2): LogWarning('%s: %s' % (seq_2.name, seq_2.string)) return aln +def _FixSelectChainName(ch_name): + """ + :return: String to be used with Select(cname=<RETURN>). Takes care of putting + quotation marks where needed. + :rtype: :class:`str` + :param ch_name: Single chain name (:class:`str`). + """ + if ch_name in ['-', '_', ' ']: + return '"%c"' % ch_name + else: + return ch_name + +def _FixSelectChainNames(ch_names): + """ + :return: String to be used with Select(cname=<RETURN>). Takes care of joining + and putting quotation marks where needed. + :rtype: :class:`str` + :param ch_names: Some iterable list of chain names (:class:`str` items). + """ + chain_set = set([_FixSelectChainName(ch_name) for ch_name in ch_names]) + return ','.join(chain_set) + # QS entity def _CleanInputEntity(ent): @@ -947,13 +1462,7 @@ def _CleanInputEntity(ent): # remove them from *ent* if removed_chains: - chain_set = set() - for ch_name in removed_chains: - if ch_name in ['-', '_', ' ']: - chain_set.add('"%c"' % ch_name) - else: - chain_set.add(ch_name) - view = ent.Select('cname!=%s' % ','.join(chain_set)) + view = ent.Select('cname!=%s' % _FixSelectChainNames(removed_chains)) ent_new = mol.CreateEntityFromView(view, True) ent_new.SetName(ent.GetName()) else: @@ -1130,8 +1639,8 @@ def _GetChemGroupsMapping(qs_ent_1, qs_ent_2): # check if we have any chains left LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping)) - if len(mapped_1) < 2 or len(mapped_2) < 2: - raise QSscoreError('Less than 2 chains left in chem_mapping.') + if len(mapped_1) < 1 or len(mapped_2) < 1: + raise QSscoreError('Less than 1 chains left in chem_mapping.') return chem_mapping def _SelectFew(l, max_elements): @@ -1160,6 +1669,13 @@ def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, :param chem_mapping: See :attr:`QSscorer.chem_mapping` :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm` """ + # make sure name doesn't contain spaces and is unique + def _FixName(seq_name, seq_names): + # get rid of spaces and make it unique + seq_name = seq_name.replace(' ', '-') + while seq_name in seq_names: + seq_name += '-' + return seq_name # resulting views into CA entities using CA chain sequences ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView() ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView() @@ -1173,12 +1689,12 @@ def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, seq_to_empty_view = dict() for ch in group_1: sequence = ca_chains_1[ch].Copy() - sequence.name = qs_ent_1.GetName() + '.' + ch + sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view) seq_to_empty_view[sequence.name] = ent_view_1 seq_list.AddSequence(sequence) for ch in group_2: sequence = ca_chains_2[ch].Copy() - sequence.name = qs_ent_2.GetName() + '.' + ch + sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view) seq_to_empty_view[sequence.name] = ent_view_2 seq_list.AddSequence(sequence) alnc = ClustalW(seq_list, clustalw=clustalw_bin) @@ -1248,8 +1764,8 @@ def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping): for _, symm_1, symm_2 in sorted(best_symm): s1 = symm_1[0] s2 = symm_2[0] - group_1 = ent_to_cm_1.Select('cname=%s' % ','.join(s1)) - group_2 = ent_to_cm_2.Select('cname=%s' % ','.join(s2)) + group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1)) + group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2)) # check if by superposing a pair of chains within the symmetry group to # superpose all chains within the symmetry group # -> if successful, the symmetry groups are compatible @@ -1628,7 +2144,8 @@ def _GetClosestChainInterface(ent, ref_chain, chains): # inaccurate. Also it could be extracted from QSscoreEntity.contacts. closest = [] for ch in chains: - iface_view = ent.Select('cname=%s and 10 <> [cname=%s]' % (ref_chain, ch)) + iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \ + % (ref_chain, ch)) nr_res = iface_view.residue_count closest.append((nr_res, ch)) closest_chain = max(closest)[1] @@ -1840,8 +2357,8 @@ def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping, # to superpose the full oligomer (e.g. if some chains are open/closed) for c1, c2 in itertools.product(g1, g2): # get superposition transformation - chain_1 = ent_1.Select('cname=%s' % c1) - chain_2 = ent_2.Select('cname=%s' % c2) + chain_1 = ent_1.Select('cname="%s"' % c1) + chain_2 = ent_2.Select('cname="%s"' % c2) res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False) # look for overlaps mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, @@ -1945,8 +2462,8 @@ def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation): atoms = [] for c1, c2 in chain_mapping.iteritems(): # get views and atom counts - chain_1 = ent_1.Select('cname=%s' % c1) - chain_2 = ent_2.Select('cname=%s' % c2) + chain_1 = ent_1.Select('cname="%s"' % c1) + chain_2 = ent_2.Select('cname="%s"' % c2) atom_count = chain_1.atom_count if atom_count != chain_2.atom_count: raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!') @@ -1978,13 +2495,13 @@ class _CachedRMSD: def GetChainView1(self, cname): """Get cached view on chain *cname* for :attr:`ent_1`.""" if cname not in self._chain_views_1: - self._chain_views_1[cname] = self.ent_1.Select('cname=%s' % cname) + self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname) return self._chain_views_1[cname] def GetChainView2(self, cname): """Get cached view on chain *cname* for :attr:`ent_2`.""" if cname not in self._chain_views_2: - self._chain_views_2[cname] = self.ent_2.Select('cname=%s' % cname) + self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname) return self._chain_views_2[cname] def GetSuperposition(self, c1, c2): @@ -2087,7 +2604,7 @@ def _AreValidSymmetries(symm_1, symm_2): return False return True -def _GetMappedAlignments(ent_1, ent_2, chain_mapping): +def _GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment): """ :return: Alignments of 2 structures given chain mapping (see :attr:`QSscorer.alignments`). @@ -2096,17 +2613,39 @@ def _GetMappedAlignments(ent_1, ent_2, chain_mapping): :param ent_2: Entity containing all chains in *chain_mapping.values()*. Views to this entity attached to second sequence of each aln. :param chain_mapping: See :attr:`QSscorer.chain_mapping` + :param res_num_alignment: See :attr:`QSscorer.res_num_alignment` """ - alns = [] + alns = list() for ch_1_name in sorted(chain_mapping): # get both sequences incl. attached view ch_1 = ent_1.FindChain(ch_1_name) - seq_1 = seq.SequenceFromChain(ch_1.name, ch_1) ch_2 = ent_2.FindChain(chain_mapping[ch_1_name]) - seq_2 = seq.SequenceFromChain(ch_2.name, ch_2) - # align them - aln = _AlignAtomSeqs(seq_1, seq_2) - if aln: alns.append(aln) + if res_num_alignment: + max_res_num = max([r.number.GetNum() for r in ch_1.residues] + + [r.number.GetNum() for r in ch_2.residues]) + ch1_aln = ["-"] * max_res_num + ch2_aln = ["-"] * max_res_num + for res in ch_1.residues: + ch1_aln[res.number.GetNum() - 1] = res.GetOneLetterCode() + ch1_aln = "".join(ch1_aln) + seq_1 = seq.CreateSequence(ch_1.name, str(ch1_aln)) + seq_1.AttachView(ch_1.Select("")) + for res in ch_2.residues: + ch2_aln[res.number.GetNum() - 1] = res.GetOneLetterCode() + ch2_aln = "".join(ch2_aln) + seq_2 = seq.CreateSequence(ch_2.name, str(ch2_aln)) + seq_2.AttachView(ch_2.Select("")) + # Create alignment + aln = seq.CreateAlignment() + aln.AddSequence(seq_1) + aln.AddSequence(seq_2) + else: + seq_1 = seq.SequenceFromChain(ch_1.name, ch_1) + seq_2 = seq.SequenceFromChain(ch_2.name, ch_2) + # align them + aln = _AlignAtomSeqs(seq_1, seq_2) + if aln: + alns.append(aln) return alns def _GetMappedResidues(alns): @@ -2301,7 +2840,7 @@ def _AddResidue(edi, res, rnum, chain, calpha_only): for atom in res.atoms: edi.InsertAtom(new_res, atom.name, atom.pos) -def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): +def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains): """ Create two new entities (based on the alignments attached views) where all residues have same numbering (when they're aligned) and they are all pushed to @@ -2321,6 +2860,9 @@ def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): :type ent_2: :class:`~ost.mol.EntityHandle` :param calpha_only: If True, we only include CA atoms instead of all. :type calpha_only: :class:`bool` + :param penalize_extra_chains: If True, extra chains are added to model and + reference. Otherwise, only mapped ones. + :type penalize_extra_chains: :class:`bool` :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*) :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle` @@ -2349,19 +2891,20 @@ def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): res_2 = col.GetResidue(1) if res_2.IsValid(): _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only) - # extra chains - for chain in ent_1.chains: - if chain.name in chain_done_1: - continue - for res in chain.residues: - rnum += 1 - _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only) - for chain in ent_2.chains: - if chain.name in chain_done_2: - continue - for res in chain.residues: - rnum += 1 - _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only) + # extra chains? + if penalize_extra_chains: + for chain in ent_1.chains: + if chain.name in chain_done_1: + continue + for res in chain.residues: + rnum += 1 + _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only) + for chain in ent_2.chains: + if chain.name in chain_done_2: + continue + for res in chain.residues: + rnum += 1 + _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only) # get entity names ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName()) ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName()) @@ -2375,30 +2918,7 @@ def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): ed_2.UpdateICS() return ent_ren_1, ent_ren_2 -def _ComputeLDDTScore(ref, mdl): - """ - :return: lDDT of *mdl* vs *ref* (see :attr:`QSscorer.lddt_score`). - :param mdl: Reference entity (see :attr:`QSscorer.lddt_mdl`) - :param ref: Model entity (see :attr:`QSscorer.lddt_ref`) - """ - # check input - LogInfo('Reference %s has: %s residues' % (ref.GetName(), ref.residue_count)) - LogInfo('Model %s has: %s residues' % (mdl.GetName(), mdl.residue_count)) - # get lddt score with fixed settings - lddt_score = mol.alg.LocalDistDiffTest(mdl.Select(''), ref.Select(''), - 2., 8., 'lddt') - LogInfo('lDDT score: %.3f' % lddt_score) - # add lDDT as B-factor to model - for r in mdl.residues: - if r.HasProp('lddt'): - for a in r.atoms: - a.SetBFactor(r.GetFloatProp('lddt')) - else: - for a in r.atoms: - a.SetBFactor(0.0) - - return lddt_score # specify public interface __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts', - 'GetContacts') + 'GetContacts', 'OligoLDDTScorer', 'MappedLDDTScorer') diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 913b157748b3febce98b159a742b64afe41054fb..1c42af212a6b849d977d2afb4f43923bd042ed56 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -20,6 +20,7 @@ #include <boost/python.hpp> #include <boost/python/raw_function.hpp> #include <boost/python/suite/indexing/map_indexing_suite.hpp> +#include <ost/log.hh> #include <ost/config.hh> #include <ost/mol/alg/local_dist_diff_test.hh> #include <ost/mol/alg/distance_test_common.hh> @@ -122,18 +123,6 @@ object lDDTSettingsInitWrapper(tuple args, dict kwargs){ object self = args[0]; args = tuple(args.slice(1,_)); - Real bond_tolerance = 12.0; - if(kwargs.contains("bond_tolerance")){ - bond_tolerance = extract<Real>(kwargs["bond_tolerance"]); - kwargs["bond_tolerance"].del(); - } - - Real angle_tolerance = 12.0; - if(kwargs.contains("angle_tolerance")){ - angle_tolerance = extract<Real>(kwargs["angle_tolerance"]); - kwargs["angle_tolerance"].del(); - } - Real radius = 15.0; if(kwargs.contains("radius")){ radius = extract<Real>(kwargs["radius"]); @@ -146,30 +135,6 @@ object lDDTSettingsInitWrapper(tuple args, dict kwargs){ kwargs["sequence_separation"].del(); } - String sel = ""; - if(kwargs.contains("sel")){ - sel = extract<String>(kwargs["sel"]); - kwargs["sel"].del(); - } - - String parameter_file_path = ""; - if(kwargs.contains("parameter_file_path")){ - parameter_file_path = extract<String>(kwargs["parameter_file_path"]); - kwargs["parameter_file_path"].del(); - } - - bool structural_checks = true; - if(kwargs.contains("structural_checks")){ - structural_checks = extract<bool>(kwargs["structural_checks"]); - kwargs["structural_checks"].del(); - } - - bool consistency_checks = true; - if(kwargs.contains("consistency_checks")){ - consistency_checks = extract<bool>(kwargs["consistency_checks"]); - kwargs["consistency_checks"].del(); - } - std::vector<Real> cutoffs; if(kwargs.contains("cutoffs")){ list cutoff_list = extract<list>(kwargs["cutoffs"]); @@ -193,26 +158,66 @@ object lDDTSettingsInitWrapper(tuple args, dict kwargs){ if(len(kwargs) > 0){ std::stringstream ss; - ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Invalid keywords observed when setting up lDDTSettings! "; ss << "Or did you pass the same keyword twice? "; - ss << "Valid keywords are: bond_tolerance, angle_tolerance, radius, "; - ss << "sequence_separation, sel, parameter_file_path, structural_checks, "; - ss << "consistency_checks, cutoffs, label!"; + ss << "Valid keywords are: radius, "; + ss << "sequence_separation, parameter_file_path, "; + ss << "cutoffs, label!"; throw std::invalid_argument(ss.str()); } - return self.attr("__init__")(bond_tolerance, - angle_tolerance, - radius, + return self.attr("__init__")(radius, sequence_separation, - sel, - parameter_file_path, - structural_checks, - consistency_checks, cutoffs, label); } +object lDDTScorerInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1, len(args))); + + std::vector<ost::mol::EntityView> reference_list_vector; + if(kwargs.contains("references")){ + list reference_list = boost::python::extract<list>(kwargs["references"]); + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + for (int i=0; i<reference_list_length; i++) { + reference_list_vector.push_back(boost::python::extract<ost::mol::EntityView>(reference_list[i])); + } + kwargs["references"].del(); + } else { + throw std::invalid_argument("'references' argument is required"); + } + + ost::mol::EntityView model; + if(kwargs.contains("model")){ + model = boost::python::extract<ost::mol::EntityView>(kwargs["model"]); + kwargs["model"].del(); + } else { + throw std::invalid_argument("'model' argument is required"); + } + + ost::mol::alg::lDDTSettings settings; + if(kwargs.contains("settings")){ + settings = extract<ost::mol::alg::lDDTSettings>(kwargs["settings"]); + kwargs["settings"].del(); + } else { + throw std::invalid_argument("'settings' argument is required"); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up lDDTScorer! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: references, model and settings!"; + throw std::invalid_argument(ss.str()); + } + + return self.attr("__init__")(reference_list_vector, + model, + settings); +} + void clean_lddt_references_wrapper(const list& reference_list) { @@ -226,7 +231,10 @@ void clean_lddt_references_wrapper(const list& reference_list) return ost::mol::alg::CleanlDDTReferences(reference_list_vector); } -ost::mol::alg::GlobalRDMap prepare_lddt_global_rdmap_wrapper(const list& reference_list, mol::alg::lDDTSettings settings) +ost::mol::alg::GlobalRDMap prepare_lddt_global_rdmap_wrapper(const list& reference_list, + list& cutoff_list, + int sequence_separation, + Real max_dist) { int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); @@ -234,14 +242,22 @@ ost::mol::alg::GlobalRDMap prepare_lddt_global_rdmap_wrapper(const list& referen for (int i=0; i<reference_list_length; i++) { reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); } + + int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); + std::vector<Real> cutoff_list_vector(cutoff_list_length); + + for (int i=0; i<cutoff_list_length; i++) { + cutoff_list_vector[i] = boost::python::extract<Real>(cutoff_list[i]); + } - return mol::alg::PreparelDDTGlobalRDMap(reference_list_vector, settings); + return mol::alg::PreparelDDTGlobalRDMap(reference_list_vector, cutoff_list_vector, sequence_separation, max_dist); } -list get_lddt_per_residue_stats_wrapper(mol::EntityHandle& model, +list get_lddt_per_residue_stats_wrapper(mol::EntityView& model, ost::mol::alg::GlobalRDMap& distance_list, - ost::mol::alg::lDDTSettings& settings) { - std::vector<mol::alg::lDDTLocalScore> scores = GetlDDTPerResidueStats(model, distance_list, settings); + bool structural_checks, + String label) { + std::vector<mol::alg::lDDTLocalScore> scores = GetlDDTPerResidueStats(model, distance_list, structural_checks, label); list local_scores_list; for (std::vector<mol::alg::lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { local_scores_list.append(*sit); @@ -249,7 +265,26 @@ list get_lddt_per_residue_stats_wrapper(mol::EntityHandle& model, return local_scores_list; } -void print_lddt_per_residue_stats_wrapper(list& scores, mol::alg::lDDTSettings settings){ +list get_local_scores_wrapper(mol::alg::lDDTScorer& scorer) { + std::vector<mol::alg::lDDTLocalScore> scores = scorer.GetLocalScores(); + list local_scores_list; + for (std::vector<mol::alg::lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + local_scores_list.append(*sit); + } + return local_scores_list; +} + +list get_references_wrapper(mol::alg::lDDTScorer& scorer) { + std::vector<mol::EntityView> references = scorer.GetReferences(); + list local_references_list; + for (std::vector<mol::EntityView>::const_iterator sit = references.begin(); sit != references.end(); ++sit) { + local_references_list.append(*sit); + } + return local_references_list; +} + + +void print_lddt_per_residue_stats_wrapper(list& scores, bool structural_checks, int cutoffs_size){ int scores_length = boost::python::extract<int>(scores.attr("__len__")()); std::vector<mol::alg::lDDTLocalScore> scores_vector(scores_length); @@ -257,7 +292,7 @@ void print_lddt_per_residue_stats_wrapper(list& scores, mol::alg::lDDTSettings s scores_vector[i] = boost::python::extract<mol::alg::lDDTLocalScore>(scores[i]); } - return mol::alg::PrintlDDTPerResidueStats(scores_vector, settings); + return mol::alg::PrintlDDTPerResidueStats(scores_vector, structural_checks, cutoffs_size); } } @@ -328,26 +363,21 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) class_<mol::alg::lDDTSettings>("lDDTSettings", no_init) .def("__init__", raw_function(lDDTSettingsInitWrapper)) - .def(init<Real, Real, Real, int, String, String, bool, bool, std::vector<Real>&, String>()) + .def(init<Real, int, std::vector<Real>&, String>()) .def("ToString", &mol::alg::lDDTSettings::ToString) - .def("SetStereoChemicalParamsPath", &mol::alg::lDDTSettings::SetStereoChemicalParamsPath) .def("PrintParameters", &mol::alg::lDDTSettings::PrintParameters) .def("__repr__", &mol::alg::lDDTSettings::ToString) .def("__str__", &mol::alg::lDDTSettings::ToString) - .def_readwrite("bond_tolerance", &mol::alg::lDDTSettings::bond_tolerance) - .def_readwrite("angle_tolerance", &mol::alg::lDDTSettings::angle_tolerance) .def_readwrite("radius", &mol::alg::lDDTSettings::radius) .def_readwrite("sequence_separation", &mol::alg::lDDTSettings::sequence_separation) - .def_readwrite("sel", &mol::alg::lDDTSettings::sel) - .def_readwrite("parameter_file_path", &mol::alg::lDDTSettings::parameter_file_path) - .def_readwrite("structural_checks", &mol::alg::lDDTSettings::structural_checks) - .def_readwrite("consistency_checks", &mol::alg::lDDTSettings::consistency_checks) .def_readwrite("cutoffs", &mol::alg::lDDTSettings::cutoffs) .def_readwrite("label", &mol::alg::lDDTSettings::label); class_<mol::alg::lDDTLocalScore>("lDDTLocalScore", init<String, String, int, String, String, Real, int, int>()) .def("ToString", &mol::alg::lDDTLocalScore::ToString) .def("GetHeader", &mol::alg::lDDTLocalScore::GetHeader) + .def("__str__", &mol::alg::lDDTLocalScore::Repr) + .def("__repr__", &mol::alg::lDDTLocalScore::Repr) .def_readwrite("cname", &mol::alg::lDDTLocalScore::cname) .def_readwrite("rname", &mol::alg::lDDTLocalScore::rname) .def_readwrite("rnum", &mol::alg::lDDTLocalScore::rnum) @@ -356,6 +386,26 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) .def_readwrite("local_lddt", &mol::alg::lDDTLocalScore::local_lddt) .def_readwrite("conserved_dist", &mol::alg::lDDTLocalScore::conserved_dist) .def_readwrite("total_dist", &mol::alg::lDDTLocalScore::total_dist); + + class_<mol::alg::lDDTScorer>("lDDTScorer", no_init) + .def("__init__", raw_function(lDDTScorerInitWrapper)) + .def(init<std::vector<mol::EntityView>&, mol::EntityView&, mol::alg::lDDTSettings&>()) + .add_property("global_score", &mol::alg::lDDTScorer::GetGlobalScore) + .add_property("conserved_contacts", &mol::alg::lDDTScorer::GetNumConservedContacts) + .add_property("total_contacts", &mol::alg::lDDTScorer::GetNumTotalContacts) + .def("PrintPerResidueStats", &mol::alg::lDDTScorer::PrintPerResidueStats) + .add_property("local_scores", &get_local_scores_wrapper) + .def_readonly("model", &mol::alg::lDDTScorer::model_view) + .add_property("references", &get_references_wrapper) + .add_property("is_valid", &mol::alg::lDDTScorer::IsValid); + + class_<mol::alg::StereoChemicalProps>("StereoChemicalProps", + init<mol::alg::StereoChemicalParams&, + mol::alg::StereoChemicalParams&, + mol::alg::ClashingDistances&>()) + .def_readwrite("bond_table", &mol::alg::StereoChemicalProps::bond_table) + .def_readwrite("angle_table", &mol::alg::StereoChemicalProps::angle_table) + .def_readwrite("nonbonded_table", &mol::alg::StereoChemicalProps::nonbonded_table); def("FillClashingDistances",&fill_clashing_distances_wrapper); def("FillStereoChemicalParams",&fill_stereochemical_params_wrapper); @@ -365,17 +415,17 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) def("CleanlDDTReferences", &clean_lddt_references_wrapper); def("PreparelDDTGlobalRDMap", &prepare_lddt_global_rdmap_wrapper, - (arg("reference_list"), arg("settings"))); + (arg("reference_list"), arg("cutoffs"), arg("sequence_separation"), arg("radius"))); def("CheckStructure", &mol::alg::CheckStructure, (arg("ent"), arg("bond_table"), arg("angle_table"), arg("nonbonded_table"), arg("bond_tolerance"), arg("angle_tolerance"))); def("GetlDDTPerResidueStats", &get_lddt_per_residue_stats_wrapper, - (arg("model"), arg("distance_list"), arg("settings"))); + (arg("model"), arg("distance_list"), arg("structural_checks"), arg("label"))); def("PrintlDDTPerResidueStats", &print_lddt_per_residue_stats_wrapper, - (arg("scores"), arg("settings"))); + (arg("scores"), arg("structural_checks"), arg("cutoff_list_length"))); class_<mol::alg::PDBize>("PDBize", diff --git a/modules/mol/alg/src/lddt.cc b/modules/mol/alg/src/lddt.cc index 60156b86b4515c773994cd3031252915525c07fe..4a9c095f53eff72b5d5ed9dedebd8c7e28761fb4 100644 --- a/modules/mol/alg/src/lddt.cc +++ b/modules/mol/alg/src/lddt.cc @@ -157,7 +157,12 @@ int main (int argc, char **argv) // sets some default values for parameters String version = OST_VERSION_STRING; lDDTSettings settings; - settings.structural_checks=false; + String parameter_file_path; + bool structural_checks = false; + bool ignore_consistency_checks = false; + Real bond_tolerance = 12.0; + Real angle_tolerance = 12.0; + String sel; // creates the required loading profile IOProfile profile; // parses options @@ -165,7 +170,7 @@ int main (int argc, char **argv) po::options_description desc("Options"); desc.add_options() ("calpha,c", "consider only calpha atoms") - ("sel,s", po::value<String>(&settings.sel)->default_value(""), "selection performed on reference structure") + ("sel,s", po::value<String>(&sel)->default_value(""), "selection performed on reference structure") ("tolerant,t", "fault tolerant mode") ("structural-checks,f", "perform stereo-chemical and clash checks") ("ignore-consistency-checks,x", "ignore residue name consistency checks") @@ -214,18 +219,18 @@ int main (int argc, char **argv) profile.calpha_only=true; } if (vm.count("structural-checks")) { - settings.structural_checks=true; + structural_checks=true; } if (vm.count("ignore-consistency-checks")) { - settings.consistency_checks=false; + ignore_consistency_checks=true; } if (vm.count("tolerant")) { profile.fault_tolerant=true; } if (vm.count("parameter-file")) { - settings.parameter_file_path=vm["parameter-file"].as<String>(); - } else if (settings.structural_checks==true) { + parameter_file_path=vm["parameter-file"].as<String>(); + } else if (structural_checks==true) { std::cout << "Please specify a stereo-chemical parameter file" << std::endl; exit(-1); } @@ -248,10 +253,10 @@ int main (int argc, char **argv) Logger::Instance().PushVerbosityLevel(ost_verbosity_level); if (vm.count("bond_tolerance")) { - settings.bond_tolerance=vm["bond_tolerance"].as<Real>(); + bond_tolerance=vm["bond_tolerance"].as<Real>(); } if (vm.count("angle_tolerance")) { - settings.angle_tolerance=vm["angle_tolerance"].as<Real>(); + angle_tolerance=vm["angle_tolerance"].as<Real>(); } if (vm.count("inclusion_radius")) { settings.radius=vm["inclusion_radius"].as<Real>(); @@ -276,10 +281,10 @@ int main (int argc, char **argv) if (!ref) { exit(-1); } - if (settings.sel != ""){ - std::cout << "Performing \"" << settings.sel << "\" selection on reference " << ref_filename << std::endl; + if (sel != ""){ + std::cout << "Performing \"" << sel << "\" selection on reference " << ref_filename << std::endl; try { - ref_list.push_back(ref.Select(settings.sel)); + ref_list.push_back(ref.Select(sel)); } catch (const ost::mol::QueryError& e) { std::cerr << "Provided selection argument failed." << std::endl << e.GetFormattedMessage() << std::endl; exit(-1); @@ -290,13 +295,21 @@ int main (int argc, char **argv) } } CleanlDDTReferences(ref_list); - glob_dist_list = PreparelDDTGlobalRDMap(ref_list, settings); + if (ref_list.size()==1) { + std::cout << "Multi-reference mode: Off" << std::endl; + } else { + std::cout << "Multi-reference mode: On" << std::endl; + } + glob_dist_list = PreparelDDTGlobalRDMap(ref_list, + settings.cutoffs, + settings.sequence_separation, + settings.radius); files.pop_back(); // prints out parameters used in the lddt calculation std::cout << "Verbosity level: " << verbosity_level << std::endl; settings.PrintParameters(); - if (settings.structural_checks) { + if (structural_checks) { LOG_INFO("Log entries format:"); LOG_INFO("BOND INFO FORMAT: Chain Residue ResNum Bond Min Max Observed Z-score Status"); LOG_INFO("ANGLE INFO FORMAT: Chain Residue ResNum Angle Min Max Observed Z-score Status"); @@ -325,10 +338,10 @@ int main (int argc, char **argv) String filestring=BFPathToString(pathstring); std::cout << "File: " << files[i] << std::endl; - if (settings.structural_checks) { - StereoChemicalParamsReader stereochemical_params(settings.parameter_file_path); + if (structural_checks) { + StereoChemicalProps stereochemical_params; try { - stereochemical_params.Read(true); + stereochemical_params = ost::io::ReadStereoChemicalPropsFile(parameter_file_path, true); } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); @@ -339,21 +352,35 @@ int main (int argc, char **argv) stereochemical_params.bond_table, stereochemical_params.angle_table, stereochemical_params.nonbonded_table, - settings.bond_tolerance, - settings.angle_tolerance); + bond_tolerance, + angle_tolerance); } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } } + // Check consistency + for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); + ref_list_it != ref_list.end(); ++ref_list_it) { + bool cons_check = ResidueNamesMatch(model_view,*ref_list_it, ignore_consistency_checks); + if (cons_check==false) { + if (ignore_consistency_checks==false) { + throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent."); + } else { + LOG_WARNING("Residue names in model and in reference structure(s) are inconsistent."); + } + } + } + // computes the lddt score - Real lddt = LocalDistDiffTest(model_view, ref_list, glob_dist_list, settings); + LocalDistDiffTest(model_view, ref_list, glob_dist_list, settings); // prints the residue-by-residue statistics std::vector<lDDTLocalScore> local_scores; - local_scores = GetlDDTPerResidueStats(model, glob_dist_list, settings); - PrintlDDTPerResidueStats(local_scores, settings); + EntityView outview = model.GetChainList()[0].Select("peptide=true"); + local_scores = GetlDDTPerResidueStats(outview, glob_dist_list, structural_checks, settings.label); + PrintlDDTPerResidueStats(local_scores, structural_checks, settings.cutoffs.size()); } return 0; } diff --git a/modules/mol/alg/src/local_dist_diff_test.cc b/modules/mol/alg/src/local_dist_diff_test.cc index 9c988d1005c1ca2614a48fbf74e7258a07caf135..de6ea31e6febb60862594f372891710e604539cd 100644 --- a/modules/mol/alg/src/local_dist_diff_test.cc +++ b/modules/mol/alg/src/local_dist_diff_test.cc @@ -7,6 +7,7 @@ #include <boost/concept_check.hpp> #include <boost/filesystem/convenience.hpp> #include <ost/mol/alg/consistency_checks.hh> +#include <ost/io/stereochemical_params_reader.hh> namespace ost { namespace mol { namespace alg { @@ -388,86 +389,43 @@ bool IsStandardResidue(String rn) return true; } return false; -} +} + +StereoChemicalProps::StereoChemicalProps(): + is_valid(false) {} + +StereoChemicalProps::StereoChemicalProps( + ost::mol::alg::StereoChemicalParams& init_bond_table, + ost::mol::alg::StereoChemicalParams& init_angle_table, + ost::mol::alg::ClashingDistances& init_nonbonded_table): + is_valid(true), + bond_table(init_bond_table), + angle_table(init_angle_table), + nonbonded_table(init_nonbonded_table) {} -lDDTSettings::lDDTSettings(): bond_tolerance(12.0), - angle_tolerance(12.0), - radius(15.0), +lDDTSettings::lDDTSettings(): radius(15.0), sequence_separation(0), - sel(""), - structural_checks(true), - consistency_checks(true), - label("localldt") { + label("locallddt") { cutoffs.push_back(0.5); cutoffs.push_back(1.0); cutoffs.push_back(2.0); cutoffs.push_back(4.0); - SetStereoChemicalParamsPath(); } -lDDTSettings::lDDTSettings(Real init_bond_tolerance, - Real init_angle_tolerance, - Real init_radius, +lDDTSettings::lDDTSettings(Real init_radius, int init_sequence_separation, - String init_sel, - String init_parameter_file_path, - bool init_structural_checks, - bool init_consistency_checks, std::vector<Real>& init_cutoffs, - String init_label): - bond_tolerance(init_bond_tolerance), - angle_tolerance(init_angle_tolerance), + String init_label): radius(init_radius), sequence_separation(init_sequence_separation), - sel(init_sel), - structural_checks(init_structural_checks), - consistency_checks(init_consistency_checks), cutoffs(init_cutoffs), - label(init_label) { - SetStereoChemicalParamsPath(init_parameter_file_path); -} - -void lDDTSettings::SetStereoChemicalParamsPath(const String& path) { - if (path == ""){ - try { - std::cerr << "Setting default stereochemical parameters file path." << std::endl; - parameter_file_path = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; - if (! structural_checks) { - std::cerr << "WARNING: Stereochemical parameters file is set but structural checks are disabled" << std::endl; - } - } catch (std::runtime_error& e) { - std::cerr << "WARNING: " << e.what() << std::endl; - std::cerr << "WARNING: Parameter file setting failed - structural check will be disabled" << std::endl; - structural_checks = false; - parameter_file_path = ""; - } - } else if (! boost::filesystem::exists(path)) { - std::cerr << "WARNING: Parameter file does not exist - structural check will be disabled" << std::endl; - structural_checks = false; - parameter_file_path = ""; - } else { - parameter_file_path = path; - structural_checks = true; - } -} + label(init_label) {} std::string lDDTSettings::ToString() { std::ostringstream rep; - if (structural_checks) { - rep << "Stereo-chemical and steric clash checks: On \n"; - } else { - rep << "Stereo-chemical and steric clash checks: Off \n"; - } - rep << "Inclusion Radius: " << radius << "\n"; rep << "Sequence separation: " << sequence_separation << "\n"; rep << "Cutoffs: " << vector_to_string(cutoffs) << "\n"; - - if (structural_checks) { - rep << "Parameter filename: " + parameter_file_path + "\n"; - rep << "Tolerance in stddevs for bonds: " << bond_tolerance << "\n"; - rep << "Tolerance in stddevs for angles: " << angle_tolerance << "\n"; - } rep << "Residue properties label: " << label << "\n"; return rep.str(); @@ -517,6 +475,12 @@ String lDDTLocalScore::ToString(bool structural_checks) const { return outstr.str(); } +String lDDTLocalScore::Repr() const { + std::stringstream outstr; + outstr << "<lDDTLocalScore " << cname << "." << rname << "." << rnum << ">"; + return outstr.str(); +} + String lDDTLocalScore::GetHeader(bool structural_checks, int cutoffs_length) { std::stringstream outstr; if (structural_checks) { @@ -527,6 +491,114 @@ String lDDTLocalScore::GetHeader(bool structural_checks, int cutoffs_length) { return outstr.str(); } +lDDTScorer::lDDTScorer(std::vector<EntityView>& init_references, + ost::mol::EntityView& init_model, + lDDTSettings& init_settings): + settings(init_settings), + model_view(init_model), + references_view(init_references) + { + _score_calculated = false; + _score_valid = false; + _has_local_scores = false; + _num_cons_con = -1; + _num_tot_con = -1; + _global_score = -1.0; + CleanlDDTReferences(references_view); + _PrepareGlobalRDMap(); + } + +Real lDDTScorer::GetGlobalScore(){ + if (!_score_calculated) { + _ComputelDDT(); + } + return _global_score; +} + +int lDDTScorer::GetNumConservedContacts(){ + if (!_score_calculated) { + _ComputelDDT(); + } + return _num_cons_con; +} + +int lDDTScorer::GetNumTotalContacts(){ + if (!_score_calculated) { + _ComputelDDT(); + } + return _num_tot_con; +} + +std::vector<lDDTLocalScore> lDDTScorer::GetLocalScores(){ + if (!_has_local_scores) { + _GetLocallDDT(); + } + return _local_scores; +} + +void lDDTScorer::PrintPerResidueStats(){ + if (!_has_local_scores) { + _GetLocallDDT(); + } + PrintlDDTPerResidueStats(_local_scores, + false, + settings.cutoffs.size()); +} + +std::vector<EntityView> lDDTScorer::GetReferences(){ + return references_view; +} + +void lDDTScorer::_PrepareGlobalRDMap(){ + glob_dist_list = PreparelDDTGlobalRDMap(references_view, + settings.cutoffs, + settings.sequence_separation, + settings.radius); +} + +bool lDDTScorer::IsValid(){ + return _score_valid; +} + +void lDDTScorer::_ComputelDDT(){ + std::pair<int,int> cov = ComputeCoverage(model_view,glob_dist_list); + if (cov.second == -1) { + LOG_INFO("Coverage: 0 (0 out of 0 residues)"); + } else { + std::stringstream sout; + sout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)"; + LOG_INFO(sout.str()); + } + + if (cov.first == 0) { + _num_tot_con = 0; + _num_cons_con = 0; + _global_score = 0.0; + _score_calculated = true; + _score_valid = false; + } + + std::pair<int,int> total_ov=alg::LocalDistDiffTest(model_view, glob_dist_list, settings.cutoffs, settings.sequence_separation, settings.label); + Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); + _num_tot_con = total_ov.second ? total_ov.second : 1; + _num_cons_con = total_ov.first; + _global_score = lddt; + _score_calculated = true; + _score_valid = true; +} + +void lDDTScorer::_GetLocallDDT(){ + if (!_score_calculated){ + _ComputelDDT(); + } + _local_scores = GetlDDTPerResidueStats(model_view, + glob_dist_list, + false, // do not print structural checks + settings.label); + _has_local_scores = true; +} + + GlobalRDMap CreateDistanceList(const EntityView& ref,Real max_dist) { GlobalRDMap dist_list; @@ -666,17 +738,6 @@ Real LocalDistDiffTest(const EntityView& v, std::vector<EntityView>& ref_list, const GlobalRDMap& glob_dist_list, lDDTSettings& settings) { - for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); - ref_list_it != ref_list.end(); ++ref_list_it) { - bool cons_check = ResidueNamesMatch(v,*ref_list_it,settings.consistency_checks); - if (cons_check==false) { - if (settings.consistency_checks==true) { - throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent."); - } else { - LOG_WARNING("Residue names in model and in reference structure(s) are inconsistent."); - } - } - } std::pair<int,int> cov = ComputeCoverage(v,glob_dist_list); if (cov.second == -1) { @@ -772,17 +833,17 @@ void CleanlDDTReferences(std::vector<EntityView>& ref_list){ } GlobalRDMap PreparelDDTGlobalRDMap(const std::vector<EntityView>& ref_list, - lDDTSettings& settings){ + std::vector<Real>& cutoff_list, + int sequence_separation, + Real max_dist){ GlobalRDMap glob_dist_list; if (ref_list.size()==1) { - std::cout << "Multi-reference mode: Off" << std::endl; - glob_dist_list = CreateDistanceList(ref_list[0], settings.radius); + glob_dist_list = CreateDistanceList(ref_list[0], max_dist); } else { - std::cout << "Multi-reference mode: On" << std::endl; glob_dist_list = CreateDistanceListFromMultipleReferences(ref_list, - settings.cutoffs, - settings.sequence_separation, - settings.radius); + cutoff_list, + sequence_separation, + max_dist); } return glob_dist_list; @@ -831,9 +892,12 @@ void CheckStructure(EntityView& ent, std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; } -std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityHandle& model, GlobalRDMap& glob_dist_list, lDDTSettings& settings){ +std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityView& model, + GlobalRDMap& glob_dist_list, + bool structural_checks, + String label){ std::vector<lDDTLocalScore> scores; - EntityView outv = model.GetChainList()[0].Select("peptide=true"); + EntityView outv = model; for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), ce = outv.GetChainList().end(); ci != ce; ++ci) { for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), @@ -844,7 +908,7 @@ std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityHandle& model, GlobalRD bool assessed = false; String assessed_string="No"; String quality_problems_string; - if (settings.structural_checks) { + if (structural_checks) { quality_problems_string="No"; } else { quality_problems_string="NA"; @@ -867,15 +931,15 @@ std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityHandle& model, GlobalRD } if (assessed==true) { - if (ritv.HasProp(settings.label)) { - lddt_local=ritv.GetFloatProp(settings.label); + if (ritv.HasProp(label)) { + lddt_local=ritv.GetFloatProp(label); std::stringstream stkeylddt; stkeylddt << std::fixed << std::setprecision(4) << lddt_local; lddt_local_string=stkeylddt.str(); - conserved_dist=ritv.GetIntProp(settings.label+"_conserved"); - total_dist=ritv.GetIntProp(settings.label+"_total"); + conserved_dist=ritv.GetIntProp(label+"_conserved"); + total_dist=ritv.GetIntProp(label+"_total"); } else { - std::cout << settings.label << std::endl; + //std::cout << label << std::endl; lddt_local = 0; lddt_local_string="0.0000"; conserved_dist = 0; @@ -898,8 +962,8 @@ std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityHandle& model, GlobalRD return scores; } -void PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, lDDTSettings& settings){ - if (settings.structural_checks) { +void PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, bool structural_checks, int cutoffs_length){ + if (structural_checks) { std::cout << "Local LDDT Scores:" << std::endl; std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; @@ -907,9 +971,9 @@ void PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, lDDTSettings& } else { std::cout << "Local LDDT Scores:" << std::endl; } - std::cout << lDDTLocalScore::GetHeader(settings.structural_checks, settings.cutoffs.size()) << std::endl; + std::cout << lDDTLocalScore::GetHeader(structural_checks, cutoffs_length) << std::endl; for (std::vector<lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { - std::cout << sit->ToString(settings.structural_checks) << std::endl; + std::cout << sit->ToString(structural_checks) << std::endl; } std::cout << std::endl; } diff --git a/modules/mol/alg/src/local_dist_diff_test.hh b/modules/mol/alg/src/local_dist_diff_test.hh index 21aee5a1d7a035ae21450b05c84212bc103a1b9d..916fe4facd4f71dbc0aa672b824e4d6c84500c00 100644 --- a/modules/mol/alg/src/local_dist_diff_test.hh +++ b/modules/mol/alg/src/local_dist_diff_test.hh @@ -20,36 +20,37 @@ #define OST_MOL_ALG_LOCAL_DIST_TEST_HH #include <ost/mol/alg/module_config.hh> +#include <ost/mol/entity_handle.hh> #include <ost/seq/alignment_handle.hh> #include <ost/mol/alg/distance_test_common.hh> #include <ost/mol/alg/filter_clashes.hh> namespace ost { namespace mol { namespace alg { +struct StereoChemicalProps +{ + bool is_valid; + ost::mol::alg::StereoChemicalParams bond_table; + ost::mol::alg::StereoChemicalParams angle_table; + ost::mol::alg::ClashingDistances nonbonded_table; + + StereoChemicalProps(); + StereoChemicalProps(ost::mol::alg::StereoChemicalParams& init_bond_table, + ost::mol::alg::StereoChemicalParams& init_angle_table, + ost::mol::alg::ClashingDistances& init_nonbonded_table); +}; + struct lDDTSettings { - Real bond_tolerance; - Real angle_tolerance; Real radius; int sequence_separation; - String sel; - String parameter_file_path; - bool structural_checks; - bool consistency_checks; std::vector<Real> cutoffs; String label; lDDTSettings(); - lDDTSettings(Real init_bond_tolerance, - Real init_angle_tolerance, - Real init_radius, + lDDTSettings(Real init_radius, int init_sequence_separation, - String init_sel, - String init_parameter_file_path, - bool init_structural_checks, - bool init_consistency_checks, std::vector<Real>& init_cutoffs, String init_label); - void SetStereoChemicalParamsPath(const String& path=""); void PrintParameters(); std::string ToString(); }; @@ -76,10 +77,45 @@ struct lDDTLocalScore { int init_total_dist); String ToString(bool structural_checks) const; + String Repr() const; static String GetHeader(bool structural_checks, int cutoffs_length); }; +class lDDTScorer +{ + public: + lDDTSettings settings; + EntityView model_view; + std::vector<EntityView> references_view; + GlobalRDMap glob_dist_list; + + lDDTScorer(std::vector<EntityView>& init_references, + ost::mol::EntityView& init_model, + lDDTSettings& init_settings); + Real GetGlobalScore(); + std::vector<lDDTLocalScore> GetLocalScores(); + int GetNumConservedContacts(); // number of conserved distances in the model + int GetNumTotalContacts(); // the number of total distances in the reference structure + std::vector<EntityView> GetReferences(); + void PrintPerResidueStats(); + bool IsValid(); + + private: + bool _score_calculated; + bool _score_valid; + bool _has_local_scores; + // number of conserved distances in the model and + // the number of total distances in the reference structure + int _num_cons_con; + int _num_tot_con; + Real _global_score; + std::vector<lDDTLocalScore> _local_scores; + void _ComputelDDT(); + void _GetLocallDDT(); + void _PrepareGlobalRDMap(); +}; + std::pair<int,int> DLLEXPORT_OST_MOL_ALG ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list); bool DLLEXPORT_OST_MOL_ALG IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list); @@ -132,7 +168,7 @@ Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& mdl, Real cutoff, Real max_dist, const String& local_ldt_property_string=""); -/// TODO document me +/// \brief Wrapper around LocalDistDiffTest Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& v, std::vector<EntityView>& ref_list, const GlobalRDMap& glob_dist_list, @@ -205,7 +241,9 @@ void DLLEXPORT_OST_MOL_ALG CleanlDDTReferences(std::vector<EntityView>& ref_list // Prepare GlobalRDMap from reference list GlobalRDMap DLLEXPORT_OST_MOL_ALG PreparelDDTGlobalRDMap( const std::vector<EntityView>& ref_list, - lDDTSettings& settings); + std::vector<Real>& cutoff_list, + int sequence_separation, + Real max_dist); void DLLEXPORT_OST_MOL_ALG CheckStructure(EntityView& ent, StereoChemicalParams& bond_table, @@ -214,15 +252,15 @@ void DLLEXPORT_OST_MOL_ALG CheckStructure(EntityView& ent, Real bond_tolerance, Real angle_tolerance); -std::vector<lDDTLocalScore> DLLEXPORT_OST_MOL_ALG GetlDDTPerResidueStats(EntityHandle& model, +std::vector<lDDTLocalScore> DLLEXPORT_OST_MOL_ALG GetlDDTPerResidueStats(EntityView& model, GlobalRDMap& glob_dist_list, - lDDTSettings& settings); + bool structural_checks, + String label); void DLLEXPORT_OST_MOL_ALG PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, - lDDTSettings& settings); + bool structural_checks, + int cutoffs_length); }}} #endif - - diff --git a/modules/mol/alg/src/molck.cc b/modules/mol/alg/src/molck.cc index e1c35083fca0a424e1b05a7eba5973e8d5e7ead3..ef5585d897e74bfd079066ac37ffc57ac774cf7e 100644 --- a/modules/mol/alg/src/molck.cc +++ b/modules/mol/alg/src/molck.cc @@ -2,6 +2,7 @@ #include <ost/mol/alg/nonstandard.hh> #include <ost/conop/model_check.hh> #include <ost/conop/amino_acids.hh> +#include <ost/conop/rule_based.hh> #include <ost/mol/alg/molck.hh> using namespace ost::conop; @@ -10,7 +11,8 @@ using namespace ost::mol; void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib) { // TODO: Maybe it is possible to make it in-place operation - EntityHandle new_ent=CreateEntity(); + EntityHandle new_ent=CreateEntity(); + new_ent.SetName(ent.GetName()); ChainHandleList chains=ent.GetChainList(); XCSEditor new_edi=new_ent.EditXCS(); for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { @@ -38,10 +40,13 @@ void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib } ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); ost::mol::alg::CopyResidue(*r,dest_res,new_edi,lib); - } - } + } + } } ent = new_ent; + // Since we didn't do it in-place: reprocess the new entity + RuleBasedProcessor pr(lib); + pr.Process(ent); } void ost::mol::alg::RemoveAtoms( diff --git a/modules/mol/alg/tests/test_qsscoring.py b/modules/mol/alg/tests/test_qsscoring.py index 17735209ed23910d30abf4d565dbb246ffbeb30a..c241eea49dea794a7f620014561a482019b3507a 100644 --- a/modules/mol/alg/tests/test_qsscoring.py +++ b/modules/mol/alg/tests/test_qsscoring.py @@ -9,6 +9,7 @@ except ImportError: "Ignoring test_qsscoring.py tests." sys.exit(0) +from ost.mol.alg import lDDTSettings def _LoadFile(file_name): """Helper to avoid repeating input path over and over.""" @@ -92,17 +93,17 @@ class TestQSscore(unittest.TestCase): ent_empty = ent.CreateEmptyView() qs_ent_invalid = QSscoreEntity(ent_empty) self.assertFalse(qs_ent_invalid.is_valid) - # monomer + # monomer - should be valid ent_mono = ent.Select('cname=A') - qs_ent_invalid = QSscoreEntity(ent_mono) - self.assertFalse(qs_ent_invalid.is_valid) + qs_ent_mono = QSscoreEntity(ent_mono) + self.assertTrue(qs_ent_mono.is_valid) # short chain removed ent_short = ent.Select('cname=A or rnum<20') - qs_ent_invalid = QSscoreEntity(ent_short) - self.assertFalse(qs_ent_invalid.is_valid) - self.assertEqual(sorted(qs_ent_invalid.removed_chains), ['B', '_']) + qs_ent_mono = QSscoreEntity(ent_short) + self.assertTrue(qs_ent_mono.is_valid) + self.assertEqual(sorted(qs_ent_mono.removed_chains), ['B', '_']) # non-AA chain removal - ent_non_AA = ent_extra.Select('cname=A,C,D') + ent_non_AA = ent_extra.Select('cname=C,D') qs_ent_invalid = QSscoreEntity(ent_non_AA) self.assertFalse(qs_ent_invalid.is_valid) self.assertEqual(sorted(qs_ent_invalid.removed_chains), ['C', 'D']) @@ -376,20 +377,30 @@ class TestQSscore(unittest.TestCase): # TEST EXTRA SCORES def test_lDDT(self): - # lDDT is not symmetrical and does not account for overprediction! + # check for penalized and unpenalized oligo lDDT ref = _LoadFile('4br6.1.pdb').Select('cname=A,B') mdl = _LoadFile('4br6.1.pdb') + lddt_settings = lDDTSettings() qs_scorer = QSscorer(ref, mdl) + lddt_oligo_scorer = qs_scorer.GetOligoLDDTScorer(lddt_settings, False) self.assertAlmostEqual(qs_scorer.global_score, 0.171, 2) self.assertAlmostEqual(qs_scorer.best_score, 1.00, 2) - self.assertAlmostEqual(qs_scorer.lddt_score, 1.00, 2) - self._CheckScorerLDDT(qs_scorer) + self.assertAlmostEqual(lddt_oligo_scorer.oligo_lddt, 1.00, 2) + # with penalty we account for extra model chains + lddt_oligo_scorer_pen = qs_scorer.GetOligoLDDTScorer(lddt_settings, True) + self.assertAlmostEqual(lddt_oligo_scorer_pen.oligo_lddt, 0.5213, 2) # flip them (use QSscoreEntity to go faster) - qs_scorer2 = QSscorer(qs_scorer.qs_ent_2, qs_scorer.qs_ent_1) + qs_scorer2 = QSscorer(qs_scorer.qs_ent_2, + qs_scorer.qs_ent_1, + res_num_alignment=True) + lddt_oligo_scorer2 = qs_scorer2.GetOligoLDDTScorer(lddt_settings, False) self.assertAlmostEqual(qs_scorer2.global_score, 0.171, 2) self.assertAlmostEqual(qs_scorer2.best_score, 1.00, 2) - self.assertAlmostEqual(qs_scorer2.lddt_score, 0.483, 2) - self._CheckScorerLDDT(qs_scorer) + # without penalty we don't see extra chains + self.assertAlmostEqual(lddt_oligo_scorer2.oligo_lddt, 1.00, 2) + # with penalty we account for extra reference chains + lddt_oligo_scorer2_pen = qs_scorer2.GetOligoLDDTScorer(lddt_settings, True) + self.assertAlmostEqual(lddt_oligo_scorer2_pen.oligo_lddt, 0.4496, 2) # check properties self.assertFalse(qs_scorer.calpha_only) self.assertEqual(qs_scorer.chem_mapping, {('B', 'A'): ('B', 'C', 'D', 'A')}) @@ -663,33 +674,6 @@ class TestQSscore(unittest.TestCase): self.assertLessEqual(qs_scorer.global_score, 1.0) - def _CheckScorerLDDT(self, qs_scorer): - # check if we live up to our promises (assume: we did global and lddt score) - self._CheckScorer(qs_scorer) - # check lddt_mdl and lddt_ref - self.assertEqual(qs_scorer.lddt_mdl.chain_count, 1) - self.assertEqual(qs_scorer.lddt_ref.chain_count, 1) - # unique resnum? - resnum_mdl = [r.number.num for r in qs_scorer.lddt_mdl.residues] - resnum_mdl_set = set(resnum_mdl) - self.assertEqual(len(resnum_mdl), len(resnum_mdl_set)) - resnum_ref = [r.number.num for r in qs_scorer.lddt_ref.residues] - resnum_ref_set = set(resnum_ref) - self.assertEqual(len(resnum_ref), len(resnum_ref_set)) - # independent shared residues count from mapped_residues - num_shared = sum(len(v) for _,v in qs_scorer.mapped_residues.iteritems()) - shared_set = resnum_ref_set.intersection(resnum_mdl_set) - self.assertEqual(len(shared_set), num_shared) - # "lddt" prop on residues and B-factors? - for r in qs_scorer.lddt_mdl.residues: - if r.number.num in shared_set: - self.assertTrue(r.HasProp('lddt')) - r_lddt = r.GetFloatProp('lddt') - else: - r_lddt = 0 - self.assertTrue(all([a.b_factor == r_lddt for a in r.atoms])) - - if __name__ == "__main__": try: settings.Locate(('clustalw', 'clustalw2')) diff --git a/modules/seq/alg/pymod/renumber.py b/modules/seq/alg/pymod/renumber.py index 9f6dd02d56e9a1ff55c7c2c5e89ed6745f199af5..434732dc865f1c584c73079ecd456b17c5f8a0fb 100644 --- a/modules/seq/alg/pymod/renumber.py +++ b/modules/seq/alg/pymod/renumber.py @@ -1,6 +1,6 @@ from ost import seq, mol -def _RenumberSeq(seq_handle): +def _RenumberSeq(seq_handle, old_number_label=None): if not seq_handle.HasAttachedView(): raise RuntimeError("Sequence Handle has no attached view") ev = seq_handle.attached_view.CreateEmptyView() @@ -11,11 +11,13 @@ def _RenumberSeq(seq_handle): if r.IsValid(): ev.AddResidue(r, mol.INCLUDE_ALL) new_numbers.append(pos+1) + if old_number_label is not None: + r.SetIntProp(old_number_label, r.number.GetNum()) else: raise RuntimeError('Error: renumbering failed at position %s' % pos) return ev, new_numbers -def _RenumberAln(aln, seq_index): +def _RenumberAln(aln, seq_index, old_number_label=None): if not aln.sequences[seq_index].HasAttachedView(): raise RuntimeError("Sequence Handle has no attached view") counter=0 @@ -34,11 +36,13 @@ def _RenumberAln(aln, seq_index): % (counter)) ev.AddResidue(r, mol.INCLUDE_ALL) new_numbers.append(counter+1) + if old_number_label is not None: + r.SetIntProp(old_number_label, r.number.GetNum()) counter += 1 return ev, new_numbers -def Renumber(seq_handle, sequence_number_with_attached_view=1): +def Renumber(seq_handle, sequence_number_with_attached_view=1, old_number_label=None): """ Function to renumber an entity according to an alignment between the model sequence and the full-length target sequence. The aligned model sequence or @@ -70,9 +74,9 @@ def Renumber(seq_handle, sequence_number_with_attached_view=1): """ if isinstance(seq_handle, seq.SequenceHandle) \ or isinstance(seq_handle, seq.ConstSequenceHandle): - ev, new_numbers = _RenumberSeq(seq_handle) + ev, new_numbers = _RenumberSeq(seq_handle, old_number_label) elif isinstance(seq_handle, seq.AlignmentHandle): - ev, new_numbers = _RenumberAln(seq_handle, sequence_number_with_attached_view) + ev, new_numbers = _RenumberAln(seq_handle, sequence_number_with_attached_view, old_number_label) else: raise RuntimeError("Unknown input type " + str(type(seq_handle))) diff --git a/scripts/ost.in b/scripts/ost.in index f610b2446b4cddb8591d528cf2633879db0bd656..6026d7f62062256a2c21f40cd8ba18b98c66bc67 100755 --- a/scripts/ost.in +++ b/scripts/ost.in @@ -28,9 +28,38 @@ else SCRIPT_NAME="$0" fi BIN_DIR=`dirname "$SCRIPT_NAME"` +OST_EXEC_DIR=$(cd $BIN_DIR/../@LIBEXEC_PATH@ && pwd) +export OST_EXEC_DIR -source "$BIN_DIR/../@LIBEXEC_PATH@/ost_config" +source "$OST_EXEC_DIR/ost_config" -$pyexec $interactive -c "execfile('$DNG_ROOT/@LIBDIR@/python@PYTHON_VERSION@/site-packages/ost/ost_startup.py')" $opts -RC=$? -exit $RC +ACTION="$1" +OST_SCRIPT="${OST_EXEC_DIR}/ost-${ACTION}" + +OLDIFS=$IFS +if test -e "${OST_SCRIPT}" ; then + opts="" + for argument in "${@:2}";do + if [ -n "$opts" ]; then + opts=$opts"#""$argument" + else + opts="$argument" + fi + done + IFS="#" + $pyexec -c "execfile('$DNG_ROOT/@LIBDIR@/python@PYTHON_VERSION@/site-packages/ost/ost_startup.py')" "${OST_SCRIPT}" $opts +else + opts="" + for argument in "$@";do + if [ -n "$opts" ]; then + opts=$opts"#""$argument" + else + opts="$argument" + fi + done + IFS="#" + $pyexec $interactive -c "execfile('$DNG_ROOT/@LIBDIR@/python@PYTHON_VERSION@/site-packages/ost/ost_startup.py')" $opts + RC=$? + exit $RC +fi +IFS=$OLDIFS diff --git a/scripts/ost_config.in b/scripts/ost_config.in index afce131f195b2f17c0198ac7439402916b15f4fb..cda517095e9a64426b97e670bb3f130c44113860 100644 --- a/scripts/ost_config.in +++ b/scripts/ost_config.in @@ -76,15 +76,6 @@ fi set -o noglob -opts="" -for argument in "$@";do - if [ -n "$opts" ]; then - opts=$opts"#""$argument" - else - opts="$argument" - fi -done - # decide whether to start interactively or not # interactive mode can be forced by setting -i as a iplt option interactive="" @@ -95,6 +86,3 @@ else interactive="-i" fi fi - - -IFS="#" diff --git a/scripts/ost_startup.py.in b/scripts/ost_startup.py.in index 612b6485cf2515deffe6023f80e3ea4ea14e8ce5..d3cac43d8daa106a882f696bf8ee02db7ccb5c39 100644 --- a/scripts/ost_startup.py.in +++ b/scripts/ost_startup.py.in @@ -1,4 +1,4 @@ -import sys, os, platform +import sys, os, platform, glob import optparse def show_help(option, opt, value, parser): @@ -11,7 +11,22 @@ def interactive_flag(option, opt, value, parser): def stop(): sys.exit(0) -usage = 'usage: ost [ost options] [script to execute] [script parameters]' +usage = """ + + ost [ost options] [script to execute] [script parameters] + +or + ost [action name] [action options] + +""" + +action_path = os.path.abspath(os.environ.get("OST_EXEC_DIR", "")) + +usage += 'Following actions are available:\n' +for action in sorted(glob.glob(os.path.join(action_path, 'ost-*'))): + usage += " %s\n" % action[len(action_path)+5:] +usage += '\nEach action should respond to "--help".\n' + class OstOptionParser(optparse.OptionParser): def __init__(self, **kwargs): optparse.OptionParser.__init__(self, **kwargs)