diff --git a/.gitignore b/.gitignore index 1a16224df6a7b2a3adc436548674fa7665c4471b..f11a3d572226b6953225d0b4229363abfb251c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,4 @@ modules/gui/src/dngr.qrc.depends /compile_commands.json /modules/bindings/tests/formatdb.log /modules/mol/mm/src/settings.hh +*.img diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 45ebdf60c4dcd6fdac658c24ad70a10190623430..d468d0dfb1d0b75d18530c1488c50cf70b725f16 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,22 @@ +Changes in Release 1.8.0 +-------------------------------------------------------------------------------- + + * Introduced recipes to generate Docker and Singularity images. + * Moved "nonstandard" functions from ost.conop to ost.mol.alg. Mapping + functions CopyResidue, CopyConserved and CopyNonConserved that were + previousely imported from ost.conop are now to be imported from ost.mol.alg. + * Removed habit of changing secondary structure of entities when loading + from mmCIF PDB files. Before, OST would turn secondary structure 'EEH' + into 'ECH' to make it look nicer in DNG. Now, 'EEH' stays 'EEH'. + * Added Molck API to the ost.mol.alg module. + * Extended lDDT API in ost.mol.alg module to reproduce functionality of lddt + binary and fixed issues in stereo chemistry checks there. + * Added `actions` interface including one action to compare structures. + * Updated HHblits binding (minor changes for optional arguments). + * Added functionality to find optimal membrane position of protein. + * Support for recent compilers which use C++-11 by default. + * Several minor bug fixes, improvements, and speed-ups. + Changes in Release 1.7.1 -------------------------------------------------------------------------------- @@ -8,7 +27,7 @@ Changes in Release 1.7.1 * Fixed unit tests * Improved documentation -Changes in Release 1.7 +Changes in Release 1.7.0 -------------------------------------------------------------------------------- * Removed Qt dependency for non-GUI compilation and fixed issues with recent @@ -24,13 +43,13 @@ Changes in Release 1.7 * Large updates for documentation * Several minor bug fixes, improvements, and speed-ups -Changes in Release 1.6 +Changes in Release 1.6.0 -------------------------------------------------------------------------------- * Added code to compare structures attached to a multiple seq. aln. * Incorporated Antechamber based force-field parameter generation for mm mod. -Changes in Release 1.5 +Changes in Release 1.5.0 -------------------------------------------------------------------------------- * Added binding to 3DComb (structural alignment) @@ -50,7 +69,7 @@ Changes in Release 1.5 * Added a wrapper to HHblits. * Removed levenberg_marquardt.h in img/alg -Changes in Release 1.4 +Changes in Release 1.4.0 -------------------------------------------------------------------------------- * Feasibility check set to off by default, atoms in compounds are now @@ -138,7 +157,7 @@ Changes in Release 1.2.1 * lDDT: Updated default angle and bond tolerance parameters from 8 stddev to 12 stddev. -Changes in Release 1.2 (since 1.1) +Changes in Release 1.2.0 (since 1.1.0) -------------------------------------------------------------------------------- * added mmCIF parser to enable loading of mmCIF files. The following categories @@ -169,4 +188,3 @@ Changes in Release 1.2 (since 1.1) to lDDT * new superposition dialog in DNG - diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cf4c283a4cbfd59833a4d76837da48948dbe9f1..74a3348880146b72ffb3f3e045ef4502110eea98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,8 +6,8 @@ cmake_minimum_required(VERSION 2.6.4 FATAL_ERROR) project(OpenStructure CXX C) set (CMAKE_EXPORT_COMPILE_COMMANDS 1) set (OST_VERSION_MAJOR 1) -set (OST_VERSION_MINOR 7) -set (OST_VERSION_PATCH 1) +set (OST_VERSION_MINOR 8) +set (OST_VERSION_PATCH 0) set (OST_VERSION_STRING ${OST_VERSION_MAJOR}.${OST_VERSION_MINOR}.${OST_VERSION_PATCH} ) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake_support) include(OST) @@ -330,6 +330,7 @@ endif() add_subdirectory(modules) add_subdirectory(scripts) add_subdirectory(tools) +add_subdirectory(actions) # deployment has to come last, to ensure that all install commands are run before deployment # magic is done add_subdirectory(deployment) diff --git a/actions/CMakeLists.txt b/actions/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce6e23204e1a1b3b08332c428e78d29384c48808 --- /dev/null +++ b/actions/CMakeLists.txt @@ -0,0 +1,4 @@ +add_custom_target(actions ALL) + +ost_action_init() +ost_action(ost-compare-structures actions) diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures new file mode 100644 index 0000000000000000000000000000000000000000..ff579be64c219ea4029389fb6be4c23eda099d13 --- /dev/null +++ b/actions/ost-compare-structures @@ -0,0 +1,945 @@ +"""Evaluate model structure against reference. + +eg. + + ost compare-structures \\ + --model <MODEL> \\ + --reference <REF> \\ + --output output.json \\ + --lddt \\ + --structural-checks \\ + --consistency-checks \\ + --molck \\ + --remove oxt hyd \\ + --map-nonstandard-residues + +Here we describe how the parameters can be set to mimick a CAMEO evaluation +(as of August 2018). + +CAMEO calls the lddt binary as follows: + + lddt \\ + -p <PARAMETER FILE> \\ + -f \\ + -a 15 \\ + -b 15 \\ + -r 15 \\ + <MODEL> \\ + <REF> + +Only model structures are "Molck-ed" in CAMEO. The call to molck is as follows: + + molck \\ + --complib=<COMPOUND LIB> \\ + --rm=hyd,oxt,unk \\ + --fix-ele \\ + --map-nonstd <FILEPATH> \\ + --out=<OUTPUT> + +To be as much compatible with with CAMEO as possible one should call +compare-structures as follows: + + ost compare-structures \\ + # General parameters + #################### + --model <MODEL> \\ + --reference <REF> \\ + --output output.json \\ + # QS-score parameters + ##################### + --qs-score \\ + --residue-number-alignment \\ + # lDDT parameters + ################# + --lddt \\ + --inclusion-radius 15.0 \\ + # Molecular check parameters + ############################ + --molck \\ + --remove oxt hyd unk \\ + --clean-element-column \\ + --map-nonstandard-residues \\ + # Additional checks + ################### + --structural-checks \\ + --bond-tolerance 15.0 \\ + --angle-tolerance 15.0 \\ + --consistency-checks +""" + +import os +import sys +import json +import argparse + +import ost +from ost.io import (LoadPDB, LoadMMCIF, SavePDB, MMCifInfoBioUnit, MMCifInfo, + MMCifInfoTransOp, ReadStereoChemicalPropsFile, profiles) +from ost import PushVerbosityLevel +from ost.mol.alg import (qsscoring, Molck, MolckSettings, lDDTSettings, + CheckStructure, ResidueNamesMatch) +from ost.conop import (CompoundLib, SetDefaultLib, GetDefaultLib, + RuleBasedProcessor) +from ost.seq.alg.renumber import Renumber + + +def _GetDefaultShareFilePath(filename): + """Look for filename in working directory and OST shared data path. + :return: Path to valid file or None if not found. + """ + # Try current directory + cwd = os.path.abspath(os.getcwd()) + file_path = os.path.join(cwd, filename) + if not os.path.isfile(file_path): + try: + file_path = os.path.join(ost.GetSharedDataPath(), filename) + except RuntimeError: + # Ignore errors here (caught later together with non-existing file) + pass + if not os.path.isfile(file_path): + file_path = None + # Either file_path is valid file path or None + return file_path + +def _GetDefaultParameterFilePath(): + # Try to get in default locations + parameter_file_path = _GetDefaultShareFilePath("stereo_chemical_props.txt") + if parameter_file_path is None: + msg = ( + "Could not set default stereochemical parameter file. In " + "order to use the default one please set $OST_ROOT " + "environmental variable, run the script with OST binary or" + " provide a local copy of 'stereo_chemical_props.txt' in " + "CWD. Alternatively provide the path to the local copy.") + else: + msg = "" + return parameter_file_path, msg + +def _GetDefaultCompoundLibraryPath(): + # Try to get in default locations + compound_library_path = _GetDefaultShareFilePath("compounds.chemlib") + if compound_library_path is None: + msg = ( + "Could not set default compounds library path. In " + "order to use the default one please set $OST_ROOT " + "environmental variable, run the script with OST binary or" + " provide a local copy of 'compounds.chemlib' in CWD" + ". Alternatively provide the path to the local copy.") + else: + msg = "" + return compound_library_path, msg + +def _ParseArgs(): + """Parse command-line arguments.""" + # + # General options + # + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description=__doc__, + prog="ost compare-structures") + + parser.add_argument( + '-v', + '--verbosity', + type=int, + default=3, + help="Set verbosity level.") + parser.add_argument( + "-m", + "--model", + dest="model", + required=True, + help=("Path to the model file.")) + parser.add_argument( + "-r", + "--reference", + dest="reference", + required=True, + help=("Path to the reference file.")) + parser.add_argument( + "-o", + "--output", + dest="output", + help=("Output file name. The output will be saved as a JSON file.")) + parser.add_argument( + "-d", + "--dump-structures", + dest="dump_structures", + default=False, + action="store_true", + help=("Dump cleaned structures used to calculate all the scores as\n" + "PDB files using specified suffix. Files will be dumped to the\n" + "same location as original files.")) + parser.add_argument( + "-ds", + "--dump-suffix", + dest="dump_suffix", + default=".compare.structures.pdb", + help=("Use this suffix to dump structures.\n" + "Defaults to .compare.structures.pdb.")) + parser.add_argument( + "-rs", + "--reference-selection", + dest="reference_selection", + default="", + help=("Selection performed on reference structures.")) + parser.add_argument( + "-ms", + "--model-selection", + dest="model_selection", + default="", + help=("Selection performed on model structures.")) + parser.add_argument( + "-ca", + "--c-alpha-only", + dest="c_alpha_only", + default=False, + action="store_true", + help=("Use C-alpha atoms only. Equivalent of calling the action with\n" + "'--model-selection=\"aname=CA\" " + "--reference-selection=\"aname=CA\"'\noptions.")) + parser.add_argument( + "-ft", + "--fault-tolerant", + dest="fault_tolerant", + default=False, + action="store_true", + help=("Fault tolerant parsing.")) + parser.add_argument( + "-cl", + "--compound-library", + dest="compound_library", + default=None, + help=("Location of the compound library file (compounds.chemlib).\n" + "If not provided, the following locations are searched in this\n" + "order: 1. Working directory, 2. OpenStructure standard library" + "\nlocation.")) + # + # QS-scorer options + # + parser.add_argument( + "-qs", + "--qs-score", + dest="qs_score", + default=False, + action="store_true", + help=("Calculate QS-score.")) + parser.add_argument( + "-c", + "--chain-mapping", + nargs="+", + type=lambda x: x.split(":"), + dest="chain_mapping", + help=("Mapping of chains between the reference and the model.\n" + "Each separate mapping consist of key:value pairs where key\n" + "is the chain name in reference and value is the chain name in\n" + "model.")) + parser.add_argument( + "--qs-rmsd", + dest="qs_rmsd", + default=False, + action="store_true", + help=("Calculate CA RMSD between shared CA atoms of mapped chains.\n" + "This uses a superposition using all mapped chains which\n" + "minimizes the CA RMSD.")) + parser.add_argument( + "-rna", + "--residue-number-alignment", + dest="residue_number_alignment", + default=False, + action="store_true", + help=("Make alignment based on residue number instead of using\n" + "a global BLOSUM62-based alignment.")) + # + # lDDT options + # + parser.add_argument( + "-l", + "--lddt", + dest="lddt", + default=False, + action="store_true", + help=("Calculate lDDT.")) + parser.add_argument( + "-ir", + "--inclusion-radius", + dest="inclusion_radius", + type=float, + default=15.0, + help=("Distance inclusion radius.")) + parser.add_argument( + "-ss", + "--sequence-separation", + dest="sequence_separation", + type=int, + default=0, + help=("Sequence separation. Only distances between residues whose\n" + "separation is higher than the provided parameter are\n" + "considered when computing the score")) + parser.add_argument( + "-spr", + "--save-per-residue-scores", + dest="save_per_residue_scores", + default=False, + action="store_true", + help=("")) + # + # Molecular check parameters + # + parser.add_argument( + "-ml", + "--molck", + dest="molck", + default=False, + action="store_true", + help=("Run molecular checker to clean up input.")) + parser.add_argument( + "-rm", + "--remove", + dest="remove", + nargs="+", # *, +, ?, N + required=False, + default=["hyd"], + help=("Remove atoms and residues matching some criteria:\n" + " * zeroocc - Remove atoms with zero occupancy\n" + " * hyd - remove hydrogen atoms\n" + " * oxt - remove terminal oxygens\n" + " * nonstd - remove all residues not one of the 20\n" + " * standard amino acids\n" + " * unk - Remove unknown and atoms not following the" + "nomenclature")) + parser.add_argument( + "-ce", + "--clean-element-column", + dest="clean_element_column", + default=False, + action="store_true", + help=("Clean up element column")) + parser.add_argument( + "-mn", + "--map-nonstandard-residues", + dest="map_nonstandard_residues", + default=False, + action="store_true", + help=("Map modified residues back to the parent amino acid, for\n" + "example MSE -> MET, SEP -> SER.")) + # + # Options for various checks + # + parser.add_argument( + "-sc", + "--structural-checks", + dest="structural_checks", + default=False, + action="store_true", + help=("Perform structural checks and filter input data.")) + parser.add_argument( + "-p", + "--parameter-file", + dest="parameter_file", + default=None, + help=("Location of the stereochemical parameter file\n" + "(stereo_chemical_props.txt).\n" + "If not provided, the following locations are searched in this\n" + "order: 1. Working directory, 2. OpenStructure standard library" + "\nlocation.")) + parser.add_argument( + "-bt", + "--bond-tolerance", + dest="bond_tolerance", + type=float, + default=12.0, + help=("Tolerance in STD for bonds.")) + parser.add_argument( + "-at", + "--angle-tolerance", + dest="angle_tolerance", + type=float, + default=12.0, + help=("Tolerance in STD for angles.")) + parser.add_argument( + "-cc", + "--consistency-checks", + dest="consistency_checks", + default=False, + action="store_true", + help=("Take consistency checks into account. By default residue name\n" + "consistency between a model-reference pair would be checked\n" + "but only a warning message will be displayed and the script\n" + "will continue to calculate scores. If this flag is ON, checks\n" + "will not be ignored and if the pair does not pass the test\n" + "all the scores for that pair will be marked as a FAILURE.")) + + # Print full help is no arguments provided + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + opts = parser.parse_args() + # Set chain mapping + if opts.chain_mapping is not None: + try: + opts.chain_mapping = dict(opts.chain_mapping) + except ValueError: + parser.error( + "Cannot parse chain mapping into dictionary. The " + "correct format is: key:value [key2:value2 ...].") + + # Check parameter file if structural checks are on + if opts.structural_checks: + if opts.parameter_file is None: + # try to get default if none provided + opts.parameter_file, msg = _GetDefaultParameterFilePath() + if msg: + parser.error(msg) + else: + # if provided it must exist + if not os.path.isfile(opts.parameter_file): + parser.error("Parameter file %s does not exist." \ + % opts.parameter_file) + + # Check compound library path (always required!) + if opts.compound_library is None: + # try to get default if none provided + opts.compound_library, msg = _GetDefaultCompoundLibraryPath() + if msg: + parser.error(msg) + else: + # if provided it must exist + if not os.path.isfile(opts.compound_library): + parser.error("Compounds library file %s does not exist." \ + % opts.compound_library) + + # Check model and reference paths + if not os.path.isfile(opts.model): + parser.error("Model file %s does not exist." % opts.model) + if not os.path.isfile(opts.reference): + parser.error("Reference file %s does not exist." % opts.reference) + + return opts + + +def _SetCompoundsChemlib(path_to_chemlib): + """Set default compound library for OST.""" + # NOTE: This is adapted from ProMod3 code and should in the future be doable + # with some shared OST code! + compound_lib = CompoundLib.Load(path_to_chemlib) + SetDefaultLib(compound_lib) + processor = RuleBasedProcessor(compound_lib) + for profile_name in profiles: + profiles[profile_name].processor = processor.Copy() + + +def _RevertChainNames(ent): + """Revert chain names to original names. + + By default the first chain with given name will not have any number + attached to it ie. if there are two chains mapping to chain A the resulting + chain names will be: A and A2. + """ + editor = ent.EditXCS() + suffix = "_tmp" # just a suffix for temporary chain name + separator = "" # dot causes selection error + used_names = dict() + reverted_chains = dict() + for chain in ent.chains: + try: + original_name = chain.GetStringProp("original_name") + except Exception as ex: + ost.LogError("Cannot revert chain %s back to original: %s" % ( + chain.name, + str(ex))) + reverted_chains[chain.name] = chain.name + editor.RenameChain(chain, chain.name + suffix) + continue + new_name = original_name + if new_name not in used_names: + used_names[original_name] = 2 + reverted_chains[chain.name] = new_name + editor.RenameChain(chain, chain.name + suffix) + else: + new_name = "%s%s%i" % (original_name, + separator, + used_names[original_name]) + reverted_chains[chain.name] = new_name + editor.RenameChain(chain, chain.name + suffix) + used_names[original_name] += 1 + for chain in ent.chains: + editor.RenameChain(chain, reverted_chains[chain.name[:-len(suffix)]]) + rev_out = ["%s -> %s" % (on, nn) for on, nn in reverted_chains.iteritems()] + ost.LogInfo("Reverted chains: %s" % ", ".join(rev_out)) + + +def _CheckConsistency(alignments, log_error): + is_cons = True + for alignment in alignments: + ref_chain = Renumber(alignment.GetSequence(0)).CreateFullView() + mdl_chain = Renumber(alignment.GetSequence(1)).CreateFullView() + new_is_cons = ResidueNamesMatch(ref_chain, mdl_chain, log_error) + is_cons = is_cons and new_is_cons + return is_cons + + +def _GetAlignmentsAsFasta(alignments): + """Get the alignments as FASTA formated string. + + :param alignments: Alignments + :type alignments: list of AlignmentHandle + :returns: list of alignments in FASTA format + :rtype: list of strings + """ + strings = list() + for alignment in alignments: + aln_str = ">reference:%s\n%s\n>model:%s\n%s" % ( + alignment.GetSequence(0).name, + alignment.GetSequence(0).GetString(), + alignment.GetSequence(1).name, + alignment.GetSequence(1).GetString()) + strings.append(aln_str) + return strings + + +def _ReadStructureFile(path, c_alpha_only=False, fault_tolerant=False, + selection=""): + """Safely read structure file into OST entity. + + The functin can read both PDB and mmCIF files. + + :param path: Path to the file. + :type path: :class:`str` + :returns: Entity + :rtype: :class:`~ost.mol.EntityHandle` + """ + + def _Select(entity): + selection_message = "Selecting %s" % selection + if selection: + ost.LogInfo(selection_message) + entity = entity.Select(selection) + return entity + + entities = list() + if not os.path.isfile(path): + raise IOError("%s is not a file" % path) + try: + entity = LoadPDB( + path, + fault_tolerant=fault_tolerant, + calpha_only=c_alpha_only) + if not entity.IsValid(): + raise IOError("Provided file does not contain valid entity.") + entity.SetName(os.path.basename(path)) + entity = _Select(entity) + entities.append(entity) + except Exception: + try: + tmp_entity, cif_info = LoadMMCIF( + path, + info=True, + fault_tolerant=fault_tolerant, + calpha_only=c_alpha_only) + if len(cif_info.biounits) == 0: + tbu = MMCifInfoBioUnit() + tbu.id = 'ASU of ' + entity.pdb_id + tbu.details = 'asymmetric unit' + for chain in tmp_entity.chains: + tbu.AddChain(str(chain)) + tinfo = MMCifInfo() + tops = MMCifInfoTransOp() + tinfo.AddOperation(tops) + tbu.AddOperations(tinfo.GetOperations()) + entity = tbu.PDBize(tmp_entity, min_polymer_size=0) + entity.SetName(os.path.basename(path) + ".au") + _RevertChainNames(entity) + entity = _Select(entity) + entities.append(entity) + elif len(cif_info.biounits) > 1: + for i, biounit in enumerate(cif_info.biounits, 1): + entity = biounit.PDBize(tmp_entity, min_polymer_size=0) + if not entity.IsValid(): + raise IOError( + "Provided file does not contain valid entity.") + entity.SetName(os.path.basename(path) + "." + str(i)) + _RevertChainNames(entity) + entity = _Select(entity) + entities.append(entity) + else: + biounit = cif_info.biounits[0] + entity = biounit.PDBize(tmp_entity, min_polymer_size=0) + if not entity.IsValid(): + raise IOError( + "Provided file does not contain valid entity.") + entity.SetName(os.path.basename(path)) + _RevertChainNames(entity) + entity = _Select(entity) + entities.append(entity) + + except Exception as exc: + raise exc + return entities + + +def _MolckEntity(entity, options): + """Molck the entity.""" + lib = GetDefaultLib() + to_remove = tuple(options.remove) + + ms = MolckSettings(rm_unk_atoms="unk" in to_remove, + rm_non_std="nonstd" in to_remove, + rm_hyd_atoms="hyd" in to_remove, + rm_oxt_atoms="oxt" in to_remove, + rm_zero_occ_atoms="zeroocc" in to_remove, + colored=False, + map_nonstd_res=options.map_nonstandard_residues, + assign_elem=options.clean_element_column) + Molck(entity, lib, ms) + + +def _Main(): + """Do the magic.""" + # + # Setup + opts = _ParseArgs() + PushVerbosityLevel(opts.verbosity) + _SetCompoundsChemlib(opts.compound_library) + # + # Read the input files + ost.LogInfo("#" * 80) + ost.LogInfo("Reading input files (fault_tolerant=%s)" % + str(opts.fault_tolerant)) + ost.LogInfo(" --> reading model from %s" % opts.model) + models = _ReadStructureFile( + opts.model, + c_alpha_only=opts.c_alpha_only, + fault_tolerant=opts.fault_tolerant, + selection=opts.model_selection) + ost.LogInfo(" --> reading reference from %s" % opts.reference) + references = _ReadStructureFile( + opts.reference, + c_alpha_only=opts.c_alpha_only, + fault_tolerant=opts.fault_tolerant, + selection=opts.reference_selection) + # molcking + if opts.molck: + ost.LogInfo("#" * 80) + ost.LogInfo("Cleaning up input with Molck") + for reference in references: + _MolckEntity(reference, opts) + for model in models: + _MolckEntity(model, opts) + # restrict to peptides (needed for CheckStructure anyways) + for i in range(len(references)): + references[i] = references[i].Select("peptide=true") + for i in range(len(models)): + models[i] = models[i].Select("peptide=true") + # structure checking + if opts.structural_checks: + ost.LogInfo("#" * 80) + ost.LogInfo("Performing structural checks") + stereochemical_parameters = ReadStereoChemicalPropsFile( + opts.parameter_file) + ost.LogInfo(" --> for reference(s)") + for reference in references: + ost.LogInfo("Checking %s" % reference.GetName()) + CheckStructure(reference, + stereochemical_parameters.bond_table, + stereochemical_parameters.angle_table, + stereochemical_parameters.nonbonded_table, + opts.bond_tolerance, + opts.angle_tolerance) + ost.LogInfo(" --> for model(s)") + for model in models: + ost.LogInfo("Checking %s" % model.GetName()) + CheckStructure(model, + stereochemical_parameters.bond_table, + stereochemical_parameters.angle_table, + stereochemical_parameters.nonbonded_table, + opts.bond_tolerance, + opts.angle_tolerance) + if len(models) > 1 or len(references) > 1: + ost.LogInfo("#" * 80) + ost.LogInfo( + "Multiple complexes mode ON. All combinations will be tried.") + + result = { + "result": {}, + "options": vars(opts)} + result["options"]["cwd"] = os.path.abspath(os.getcwd()) + # + # Perform scoring + skipped = list() + for model in models: + model_name = model.GetName() + model_results = dict() + for reference in references: + reference_name = reference.GetName() + reference_results = { + "info": dict()} + ost.LogInfo("#" * 80) + ost.LogInfo("Comparing %s to %s" % ( + model_name, + reference_name)) + qs_scorer = qsscoring.QSscorer(reference, + model, + opts.residue_number_alignment) + if opts.chain_mapping is not None: + ost.LogInfo( + "Using custom chain mapping: %s" % str( + opts.chain_mapping)) + qs_scorer.chain_mapping = opts.chain_mapping + else: + qs_scorer.chain_mapping # just to initialize it + ost.LogInfo("-" * 80) + ost.LogInfo("Checking consistency between %s and %s" % ( + model_name, reference_name)) + is_cons = _CheckConsistency( + qs_scorer.alignments, + opts.consistency_checks) + reference_results["info"]["residue_names_consistent"] = is_cons + reference_results["info"]["mapping"] = { + "chain_mapping": qs_scorer.chain_mapping, + "chain_mapping_scheme": qs_scorer.chain_mapping_scheme, + "alignments": _GetAlignmentsAsFasta(qs_scorer.alignments)} + skip_score = False + if opts.consistency_checks: + if not is_cons: + msg = (("Residue names in model %s and in reference " + "%s are inconsistent.") % ( + model_name, + reference_name)) + ost.LogError(msg) + skip_score = True + skipped.append(skip_score) + else: + ost.LogInfo("Consistency check: OK") + skipped.append(False) + else: + skipped.append(False) + if not is_cons: + msg = (("Residue names in model %s and in reference " + "%s are inconsistent.\nThis might lead to " + "corrupted results.") % ( + model_name, + reference_name)) + ost.LogWarning(msg) + else: + ost.LogInfo("Consistency check: OK") + if opts.qs_rmsd: + ost.LogInfo("-" * 80) + if skip_score: + ost.LogInfo( + "Skipping QS-RMSD because consistency check failed") + reference_results["qs_rmsd"] = { + "status": "FAILURE", + "error": "Consistency check failed."} + else: + ost.LogInfo("Computing QS-RMSD") + try: + reference_results["qs_rmsd"] = { + "status": "SUCCESS", + "error": "", + "ca_rmsd": qs_scorer.superposition.rmsd} + except qsscoring.QSscoreError as ex: + ost.LogError('QS-RMSD failed:', str(ex)) + reference_results["qs_rmsd"] = { + "status": "FAILURE", + "error": str(ex)} + if opts.qs_score: + ost.LogInfo("-" * 80) + if skip_score: + ost.LogInfo( + "Skipping QS-score because consistency check failed") + reference_results["qs_score"] = { + "status": "FAILURE", + "error": "Consistency check failed.", + "global_score": 0.0, + "best_score": 0.0} + else: + ost.LogInfo("Computing QS-score") + try: + reference_results["qs_score"] = { + "status": "SUCCESS", + "error": "", + "global_score": qs_scorer.global_score, + "best_score": qs_scorer.best_score} + except qsscoring.QSscoreError as ex: + # default handling: report failure and set score to 0 + ost.LogError('QSscore failed:', str(ex)) + reference_results["qs_score"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0, + "best_score": 0.0} + # Calculate lDDT + if opts.lddt: + ost.LogInfo("-" * 80) + ost.LogInfo("Computing lDDT scores") + lddt_results = { + "single_chain_lddt": list() + } + lddt_settings = lDDTSettings( + radius=opts.inclusion_radius, + sequence_separation=opts.sequence_separation, + label="lddt") + ost.LogInfo("lDDT settings: ") + ost.LogInfo(str(lddt_settings).rstrip()) + ost.LogInfo("===") + oligo_lddt_scorer = qs_scorer.GetOligoLDDTScorer(lddt_settings) + for mapped_lddt_scorer in oligo_lddt_scorer.mapped_lddt_scorers: + # Get data + lddt_scorer = mapped_lddt_scorer.lddt_scorer + model_chain = mapped_lddt_scorer.model_chain_name + reference_chain = mapped_lddt_scorer.reference_chain_name + if skip_score: + ost.LogInfo( + " --> Skipping single chain lDDT because " + "consistency check failed") + lddt_results["single_chain_lddt"].append({ + "status": "FAILURE", + "error": "Consistency check failed.", + "model_chain": model_chain, + "reference_chain": reference_chain, + "global_score": 0.0, + "conserved_contacts": 0.0, + "total_contacts": 0.0}) + else: + try: + ost.LogInfo((" --> Computing lDDT between model " + "chain %s and reference chain %s") % ( + model_chain, + reference_chain)) + ost.LogInfo("Global LDDT score: %.4f" % + lddt_scorer.global_score) + ost.LogInfo( + "(%i conserved distances out of %i checked, over " + "%i thresholds)" % (lddt_scorer.conserved_contacts, + lddt_scorer.total_contacts, + len(lddt_settings.cutoffs))) + sc_lddt_scores = { + "status": "SUCCESS", + "error": "", + "model_chain": model_chain, + "reference_chain": reference_chain, + "global_score": lddt_scorer.global_score, + "conserved_contacts": + lddt_scorer.conserved_contacts, + "total_contacts": lddt_scorer.total_contacts} + if opts.save_per_residue_scores: + per_residue_sc = \ + mapped_lddt_scorer.GetPerResidueScores() + ost.LogInfo("Per residue local lDDT (reference):") + ost.LogInfo("Chain\tResidue Number\tResidue Name" + "\tlDDT\tConserved Contacts\tTotal " + "Contacts") + for prs_scores in per_residue_sc: + ost.LogInfo("%s\t%i\t%s\t%.4f\t%i\t%i" % ( + reference_chain, + prs_scores["residue_number"], + prs_scores["residue_name"], + prs_scores["lddt"], + prs_scores["conserved_contacts"], + prs_scores["total_contacts"])) + sc_lddt_scores["per_residue_scores"] = \ + per_residue_sc + lddt_results["single_chain_lddt"].append( + sc_lddt_scores) + except Exception as ex: + ost.LogError('Single chain lDDT failed:', str(ex)) + lddt_results["single_chain_lddt"].append({ + "status": "FAILURE", + "error": str(ex), + "model_chain": model_chain, + "reference_chain": reference_chain, + "global_score": 0.0, + "conserved_contacts": 0.0, + "total_contacts": 0.0}) + # perform oligo lddt scoring + if skip_score: + ost.LogInfo( + " --> Skipping oligomeric lDDT because consistency " + "check failed") + lddt_results["oligo_lddt"] = { + "status": "FAILURE", + "error": "Consistency check failed.", + "global_score": 0.0} + else: + try: + ost.LogInfo(' --> Computing oligomeric lDDT score') + lddt_results["oligo_lddt"] = { + "status": "SUCCESS", + "error": "", + "global_score": oligo_lddt_scorer.oligo_lddt} + ost.LogInfo( + "Oligo lDDT score: %.4f" % + oligo_lddt_scorer.oligo_lddt) + except Exception as ex: + ost.LogError('Oligo lDDT failed:', str(ex)) + lddt_results["oligo_lddt"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0} + if skip_score: + ost.LogInfo( + " --> Skipping weighted lDDT because consistency " + "check failed") + lddt_results["weighted_lddt"] = { + "status": "FAILURE", + "error": "Consistency check failed.", + "global_score": 0.0} + else: + try: + ost.LogInfo(' --> Computing weighted lDDT score') + lddt_results["weighted_lddt"] = { + "status": "SUCCESS", + "error": "", + "global_score": oligo_lddt_scorer.weighted_lddt} + ost.LogInfo( + "Weighted lDDT score: %.4f" % + oligo_lddt_scorer.weighted_lddt) + except Exception as ex: + ost.LogError('Weighted lDDT failed:', str(ex)) + lddt_results["weighted_lddt"] = { + "status": "FAILURE", + "error": str(ex), + "global_score": 0.0} + reference_results["lddt"] = lddt_results + model_results[reference_name] = reference_results + if opts.dump_structures: + ost.LogInfo("-" * 80) + ref_output_path = os.path.join( + os.path.dirname(opts.reference), + reference_name + opts.dump_suffix) + ost.LogInfo("Saving cleaned up reference to %s" % + ref_output_path) + try: + SavePDB(qs_scorer.qs_ent_1.ent, + ref_output_path) + except Exception as ex: + ost.LogError("Cannot save reference: %s" % str(ex)) + mdl_output_path = os.path.join( + os.path.dirname(opts.model), + model_name + opts.dump_suffix) + ost.LogInfo("Saving cleaned up model to %s" % + mdl_output_path) + try: + SavePDB(qs_scorer.qs_ent_2.ent, + mdl_output_path) + except Exception as ex: + ost.LogError("Cannot save model: %s" % str(ex)) + result["result"][model_name] = model_results + + if all(skipped) and len(skipped) > 0: + ost.LogError("Consistency check failed for all model-reference pairs.") + if opts.output is not None: + ost.LogInfo("#" * 80) + ost.LogInfo("Saving output into %s" % opts.output) + with open(opts.output, "w") as outfile: + outfile.write(json.dumps(result, indent=4)) + + +if __name__ == '__main__': + # make script 'hot' + unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0) + sys.stdout = unbuffered + _Main() diff --git a/cmake_support/OST.cmake b/cmake_support/OST.cmake index 8e4e243432396a870b007da628e04713ac021c6f..4364b0c9407b4f82c3cd1cbb41f69cb09c217a73 100644 --- a/cmake_support/OST.cmake +++ b/cmake_support/OST.cmake @@ -929,3 +929,49 @@ macro(setup_boost) set(BOOST_THREAD ${Boost_LIBRARIES}) set(Boost_LIBRARIES) endmacro() + + +#------------------------------------------------------------------------------- +# Synopsis: +# ost_action_init() +# +# Description: +# Initialise cached variables +#------------------------------------------------------------------------------- +macro(ost_action_init) + set(OST_ACTION_NAMES "" CACHE INTERNAL "" FORCE) +endmacro(ost_action_init) + +#------------------------------------------------------------------------------- +# Synopsis: +# ost_action(ACTION TARGET) +# +# Description: +# Add a script to actions. +# ACTION script to be added (needs to have permissions to be executed) +# TARGET make target to add the action to +#------------------------------------------------------------------------------- +macro(ost_action ACTION TARGET) + copy_if_different("${CMAKE_CURRENT_SOURCE_DIR}" + "${STAGE_DIR}/${LIBEXEC_PATH}" + "${ACTION}" "TARGETS" ${TARGET}) + install(FILES "${ACTION}" DESTINATION "${LIBEXEC_PATH}" + PERMISSIONS WORLD_EXECUTE GROUP_EXECUTE OWNER_EXECUTE + WORLD_READ GROUP_READ OWNER_READ) + # storing tool names for bash completion + string(REGEX REPLACE "^ost-" "" stripped_action ${ACTION}) + if(DEFINED OST_ACTION_NAMES) + if(${OST_ACTION_NAMES} MATCHES "${stripped_action}") + set(_ACTION_NAMES "${OST_ACTION_NAMES}") + else() + if("${OST_ACTION_NAMES}" STREQUAL "") + set(_ACTION_NAMES "${stripped_action}") + else() + set(_ACTION_NAMES "${OST_ACTION_NAMES} ${stripped_action}") + endif() + endif() + else() + set(_ACTION_NAMES "${stripped_action}") + endif() + set(OST_ACTION_NAMES "${_ACTION_NAMES}" CACHE INTERNAL "" FORCE) +endmacro(ost_action) \ No newline at end of file diff --git a/doc/conf/conf.py b/doc/conf/conf.py index 9ddc034d552ed0d85a7f74aa500d22ed415ecd68..78949cd7a85e80047bb397debf755fd8385827ee 100644 --- a/doc/conf/conf.py +++ b/doc/conf/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'OpenStructure' -copyright = u'2011, OpenStructure authors' +copyright = u'2018, OpenStructure authors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..996b7fc8b87adb21b37c209ef48d267706d469c0 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,176 @@ +FROM ubuntu:16.04 + +# ARGUMENTS +########### + +# OBLIGATORY +ARG OPENSTRUCTURE_VERSION +RUN if [ -z "${OPENSTRUCTURE_VERSION}" ]; then \ + echo "OPENSTRUCTURE_VERSION argument is obligatory. Run with --build-arg OPENSTRUCTURE_VERSION=<VERSION>"; \ + exit 1; \ + else : ; fi + +# OPTIONAL +ARG SRC_FOLDER="/usr/local/src" +ARG CPUS_FOR_MAKE=8 +ARG PYTHONPATH="/usr/local/lib64/python2.7/site-packages:${PYTHONPATH}" +ARG OPENSTRUCTURE_SHARE="/usr/local/share/ost" +ARG MSMS_VERSION="2.6.1" +ARG OPENMM_VERSION="7.1.1" +ARG DSSP_VERSION="2.2.1" +ARG OPENMM_INCLUDE_PATH="/usr/local/openmm/include/" +ARG OPENMM_LIB_PATH="/usr/local/openmm/lib/" + +# INSTALL SYSTEM DEPS +##################### +RUN apt-get update -y && apt-get install -y cmake \ + sip-dev \ + libtiff-dev \ + libfftw3-dev \ + libeigen3-dev \ + libboost-all-dev \ + libpng-dev \ + python-all \ + python2.7 \ + python-qt4 \ + qt4-qtconfig \ + qt4-qmake \ + libqt4-dev \ + libpng-dev \ + wget \ + git \ + gfortran \ + python-pip \ + tar \ + libbz2-dev \ + doxygen \ + swig \ + clustalw \ + python-virtualenv \ + locales && \ + # CLEANUP + rm -rf /var/lib/apt/lists/* +# INSTALL SOME PYTHON PACKAGES GLOBALY +###################################### +RUN pip install -U pip==9.0.3 && \ + pip install -U setuptools==39.0.1 && \ + pip install --no-cache-dir numpy==1.10.4 \ + scipy==1.0.0 \ + pandas==0.22.0 + +# SET LOCALE +############ +# RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment +# RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen +# RUN echo "LANG=en_US.UTF-8" > /etc/locale.conf +# RUN locale-gen en_US.UTF-8 + +# DOWNLOAD AND INSTALL MSMS +########################### +RUN cd ${SRC_FOLDER} && \ + mkdir -p msms && \ + cd ${SRC_FOLDER}/msms && \ + wget http://mgltools.scripps.edu/downloads/tars/releases/MSMSRELEASE/REL${MSMS_VERSION}/msms_i86_64Linux2_${MSMS_VERSION}.tar.gz && \ + tar -xvzf msms_i86_64Linux2_${MSMS_VERSION}.tar.gz && \ + cp -v ${SRC_FOLDER}/msms/msms.x86_64Linux2.${MSMS_VERSION} /usr/local/bin/msms && \ + cp -v ${SRC_FOLDER}/msms/pdb_to_xyzr /usr/local/bin/pdb_to_xyzr && \ + cp -v ${SRC_FOLDER}/msms/pdb_to_xyzrn /usr/local/bin/pdb_to_xyzrn && \ + rm -rf ${SRC_FOLDER}/msms + +# COMPILE OPENMM FROM SOURCES. INSTALL TO /usr/local +#################################################### +RUN cd ${SRC_FOLDER} && \ + wget -O openmm-${OPENMM_VERSION}.tar.gz -nc https://github.com/pandegroup/openmm/archive/${OPENMM_VERSION}.tar.gz && \ + mkdir ${SRC_FOLDER}/openmm-${OPENMM_VERSION} && \ + tar xf openmm-${OPENMM_VERSION}.tar.gz -C ${SRC_FOLDER}/openmm-${OPENMM_VERSION} --strip-components=1 && \ + mkdir -p ${SRC_FOLDER}/openmm-${OPENMM_VERSION}/build && \ + cd ${SRC_FOLDER}/openmm-${OPENMM_VERSION}/build && \ + cmake .. && make -j $CPUS_FOR_MAKE && make install && \ + cd ${SRC_FOLDER}/openmm-${OPENMM_VERSION}/build/python && \ + python setup.py build && python setup.py install && \ + rm -rf ${SRC_FOLDER}/openmm-${OPENMM_VERSION}.tar.gz && \ + rm -rf ${SRC_FOLDER}/openmm-${OPENMM_VERSION} + + +# COMPILE AND INSTALL DSSP +########################## +RUN cd ${SRC_FOLDER} && \ + wget ftp://ftp.cmbi.umcn.nl/pub/molbio/software/dssp-2/dssp-${DSSP_VERSION}.tgz && \ + tar -xvzf dssp-${DSSP_VERSION}.tgz && \ + cd dssp-${DSSP_VERSION} && \ + make -j ${CPUS_FOR_MAKE} && \ + make install && \ + rm -rf ${SRC_FOLDER}/dssp-${DSSP_VERSION}.tgz && \ + rm -rf ${SRC_FOLDER}/dssp-${DSSP_VERSION} + +# INSTALL OST +############# +RUN cd ${SRC_FOLDER} && \ +# copy ost release + wget -O openstructure-${OPENSTRUCTURE_VERSION}.tar.gz -nc https://git.scicore.unibas.ch/schwede/openstructure/repository/${OPENSTRUCTURE_VERSION}/archive.tar.gz && \ + mkdir openstructure-${OPENSTRUCTURE_VERSION} && \ + tar xf openstructure-${OPENSTRUCTURE_VERSION}.tar.gz -C ${SRC_FOLDER}/openstructure-${OPENSTRUCTURE_VERSION} --strip-components=1 && \ + mkdir -p ${SRC_FOLDER}/openstructure-${OPENSTRUCTURE_VERSION}/build +WORKDIR ${SRC_FOLDER}/openstructure-${OPENSTRUCTURE_VERSION}/build + +# cmake ost +RUN cmake .. -DPYTHON_LIBRARIES=/usr/lib/x86_64-linux-gnu/libpython2.7.so \ + -DOPTIMIZE=ON \ + -DENABLE_MM=ON \ + -DCOMPILE_TMTOOLS=1 \ + -DUSE_NUMPY=1 \ + -DOPEN_MM_LIBRARY=$OPENMM_LIB_PATH/libOpenMM.so \ + -DOPEN_MM_INCLUDE_DIR=$OPENMM_INCLUDE_PATH \ + -DOPEN_MM_PLUGIN_DIR=$OPENMM_LIB_PATH/plugins \ + -DENABLE_GFX=ON \ + -DENABLE_GUI=ON && \ + # Build chemdict_tool + make -j ${CPUS_FOR_MAKE} chemdict_tool + +# get the compound library +RUN wget ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz +RUN stage/bin/chemdict_tool create components.cif.gz compounds.chemlib pdb && stage/bin/chemdict_tool update modules/conop/data/charmm.cif compounds.chemlib charmm +RUN mkdir -p $OPENSTRUCTURE_SHARE && chmod a+rw -R $OPENSTRUCTURE_SHARE && mv compounds.chemlib $OPENSTRUCTURE_SHARE + +# Build and install OST +RUN cmake .. -DPYTHON_LIBRARIES=/usr/lib/x86_64-linux-gnu/libpython2.7.so \ + -DOPTIMIZE=ON \ + -DENABLE_MM=ON \ + -DCOMPILE_TMTOOLS=1 \ + -DUSE_NUMPY=1 \ + -DOPEN_MM_LIBRARY=$OPENMM_LIB_PATH/libOpenMM.so \ + -DOPEN_MM_INCLUDE_DIR=$OPENMM_INCLUDE_PATH \ + -DOPEN_MM_PLUGIN_DIR=$OPENMM_LIB_PATH/plugins \ + -DENABLE_GFX=ON \ + -DENABLE_GUI=ON \ + -DCOMPOUND_LIB=$OPENSTRUCTURE_SHARE/compounds.chemlib && \ + # Build OST with compound library + make -j ${CPUS_FOR_MAKE} && make check && make install + +# GO HOME AND CLEANUP +##################### +# RUN apt-get purge -y cmake \ +# wget \ +# git \ +# gfortran \ +# python-pip \ +# libbz2-dev \ +# doxygen \ +# swig +# RUN apt-get clean +# RUN apt-get autoremove -y +# +# WORKDIR $SRC_FOLDER +# RUN rm -rf $SRC_FOLDER/* + +WORKDIR /home + +# ENVIRONMENT +############################################################################## +ENV OST_ROOT="/usr/local" +ENV PYTHONPATH="/usr/local/lib64/python2.7/site-packages:${PYTHONPATH}" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib64" +ENV QT_X11_NO_MITSHM=1 + +ENTRYPOINT ["ost"] +CMD ["-i"] diff --git a/docker/README.rst b/docker/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..ebed4e994c2aac8045b0f1171fb38948bb19d849 --- /dev/null +++ b/docker/README.rst @@ -0,0 +1,187 @@ +OST Docker +========== + +.. note:: + + For many docker installations it is required to run docker commands as root. As + this depends on set up, we skip the ``sudo`` in all commands. + +Build Docker image +------------------ + + +In order to build OST image: + +.. code-block:: bash + + cd <PATH TO OST>/docker + docker build --tag <IMAGE NAME> --build-arg OPENSTRUCTURE_VERSION=<VERSION> -f Dockerfile . + +or if you downloaded the Dockerfile directly: + +.. code-block:: bash + + docker build --tag <IMAGE NAME> --build-arg OPENSTRUCTURE_VERSION=<VERSION> -f <DOCKERFILE NAME> <PATH TO DOCKERFILE DIR> + +You can chose any image name (tag) eg. ost. The ``OPENSTRUCTURE_VERSION`` +build argument is mandatory and image will not built without it. See +`CHANGELOG <https://git.scicore.unibas.ch/schwede/openstructure/blob/master/CHANGELOG.txt>`_ +for current list of available releases. This is not expected to work for +versions which are much older than the most recent one since the dependencies +might have changed, but it should work for a few versions. If you need the +recipe for an older version, we suggest to get an older recipe from the git +history. + +Testing the image +----------------- + +One can find a exemplary script (``test_docker.py``) in the downloaded directory. +To run it do: + +.. code-block:: + + cd <PATH TO OST>/docker + docker run --rm -v $(pwd):/home <IMAGE NAME> test_docker.py + +As the last line you should see ``OST is working!``. + +Run script and action with OST +------------------------------ + +.. note:: + + If script or action requires some external files eg. PDBs, they have to be located in the + path accessible via mounted volume and should be accessed via docker (NOT LOCAL) + path. Eg. assuming that we have a struc.pdb file in /home/user/pdbs directory and + a script.py in /home/user we could mount the /home/user to /home in docker as + above by specifying -v /home/user:/home. To run the script we thus need to + provide the (relative) path to the script and (relative) path to the file eg: + + .. code-block:: bash + + docker run --rm -v /home/user:/home <IMAGE NAME> script.py pdbs/struct.pdb + + or with absolute paths: + + .. code-block:: bash + + docker run --rm -v /home/user:/home <IMAGE NAME> /home/script.py /home/pdbs/struct.pdb + + An easy solution to mount a CWD is to use $(pwd) command in the -v option + of the Docker. For an example see the action exemplary run. + The same reasoning is valid for the output files. + +Actions +####### + +To see the list of available actions do: + + .. code-block:: + + docker run --rm <IMAGE NAME> -h + +To run chosen action do: + + .. code-block:: + + docker run --rm <IMAGE NAME> <ACTION NAME> + + +Here is an example run of the compare-structures action: + +.. code-block:: + + docker run --rm -v $(pwd):/home <IMAGE NAME> compare-structures \ + --model model.pdb \ + --reference reference.pdb \ + --output output.json \ + --qs-score \ + --residue-number-alignment \ + --lddt \ + --structural-checks \ + --consistency-checks \ + --inclusion-radius 15.0 \ + --bond-tolerance 15.0 \ + --angle-tolerance 15.0 \ + --molck \ + --remove oxt hyd unk \ + --clean-element-column \ + --map-nonstandard-residues + + +In order to see all available options for this action run: + +.. code-block:: + + docker run --rm <IMAGE NAME> compare-structures -h + +Scripts +####### + +In order to run OST script do: + +.. code-block:: bash + + docker run [DOCKER OPTIONS] --rm -v <PATH TO SCRIPT DIR>:/home <IMAGE NAME> /home/<SCRIPT NAME> [SCRIPT OPTIONS] + +Run ost with utility command +############################### + +One can also use provided utility bash script ``run_docker_ost`` to run basic +scripts and actions: + +.. code-block:: bash + + <PATH TO OST>/docker/run_docker_ost <IMAGE_NAME> [<SCRIPT_PATH>] [SCRIPT OPTIONS] + +One just needs to provide image name and optionally a script/action and its +options. It is useful to link the command to the binary directory eg. in linux: + +.. code-block:: bash + + ln -s <PATH TO OST>/docker/run_docker_ost /usr/bin/run_docker_ost + +In order to run an exemplary script (``test_docker.py``) do: + +.. code-block:: + + cd <PATH TO OST>/docker + ./run_docker_ost <IMAGE NAME> test_docker.py + +To see the help for compare-structures action run: + +.. code-block:: + + cd <PATH TO OST>/docker + ./run_docker_ost <IMAGE NAME> compare-structures + +Run GUI +------- + +In order to run GUI do (tested on linux machine): + +.. code-block:: bash + + xhost +local:docker + docker run -ti --rm -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix --entrypoint dng <IMAGE NAME> + +Running other commands +---------------------- + +The default entrypoint of the Docker image is "ost" thus in order to run other +available commands (or other commands in general) one need to override +the entrypoint: + +.. code-block:: + + docker run --rm -ti --entrypoint <COMMAND> <IMAGE NAME> [COMMAND OPTIONS] + +Eg. to run molck type: + +.. code-block:: + + docker run --rm -ti --entrypoint molck <IMAGE NAME> --help + +.. note:: + + Note how the options to the command are specified after the image name. diff --git a/docker/run_docker_ost b/docker/run_docker_ost new file mode 100755 index 0000000000000000000000000000000000000000..9930329a2a6b6d8f0ebf381287c2c38deda8925d --- /dev/null +++ b/docker/run_docker_ost @@ -0,0 +1,28 @@ +#!/bin/bash + +# end when error +set -e + +image_name=$1 +script_path=$2 + +if [[ ${#@} -lt 1 ]]; then + echo "Usage: run_docker_ost <IMAGE_NAME> [<SCRIPT_PATH>]" + exit 1 +fi + +if [[ -z ${script_path} ]]; then + docker run -ti --rm -v $(pwd):/home ${image_name} +else + if [[ -e $script_path ]]; then + abspath=$(readlink -f $script_path) + dirpath=$(dirname $abspath) + name=$(basename $script_path) + docker run --rm -v ${dirpath}:/home ${image_name} /home/${name} ${@:3} + else + # it is maybe an action if it does not exist + docker run --rm -v $(pwd):/home ${image_name} ${script_path} ${@:3} + fi +fi + + diff --git a/docker/test_docker.py b/docker/test_docker.py new file mode 100644 index 0000000000000000000000000000000000000000..e191217e63f9f8de3ff979e54c787a422428394f --- /dev/null +++ b/docker/test_docker.py @@ -0,0 +1,19 @@ +import ost +from ost.mol.alg import qsscoring + +# load two biounits to compare +ent_full = ost.io.LoadPDB('3ia3', remote=True) +ent_1 = ent_full.Select('cname=A,D') +ent_2 = ent_full.Select('cname=B,C') +# get score +ost.PushVerbosityLevel(3) +try: + qs_scorer = qsscoring.QSscorer(ent_1, ent_2) + ost.LogScript('QSscore:', str(qs_scorer.global_score)) + ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping)) +except qsscoring.QSscoreError as ex: + # default handling: report failure and set score to 0 + ost.LogError('QSscore failed:', str(ex)) + qs_score = 0 + +print "OST is working!" diff --git a/modules/base/src/test_utils/compare_files.cc b/modules/base/src/test_utils/compare_files.cc index 904b14a7fa75dde9bf4edcf3cab176966f312dc1..13dac63adcd4a2f8244303f66d187cadade08ba3 100644 --- a/modules/base/src/test_utils/compare_files.cc +++ b/modules/base/src/test_utils/compare_files.cc @@ -37,17 +37,18 @@ bool compare_files(const String& test, const String& gold_standard) } String test_line, gold_line; while (true) { - bool test_end=std::getline(test_stream, test_line) != 0; - bool gold_end=std::getline(gold_stream, gold_line) != 0; - if (!(test_end || gold_end)) { + bool test_read = static_cast<bool>(std::getline(test_stream, test_line)); + bool gold_read = static_cast<bool>(std::getline(gold_stream, gold_line)); + if (!test_read && !gold_read) { + // nothing to read anymore in any of the files return true; } - if (!test_end) { + if (gold_read && !test_read) { std::cerr << gold_standard << " contains additional line(s):" << std::endl << gold_line << std::endl; return false; } - if (!gold_end) { + if (test_read && !gold_read) { std::cerr << test << " contains additional line(s):" << std::endl << test_line << std::endl; return false; diff --git a/modules/bindings/doc/hhblits.rst b/modules/bindings/doc/hhblits.rst index c822d5bd0641d1fc6a863066f4196e3fe77f467a..9ffb1c0c6f5d81b2a35b4299e54a30165bf656e1 100644 --- a/modules/bindings/doc/hhblits.rst +++ b/modules/bindings/doc/hhblits.rst @@ -1,9 +1,6 @@ :mod:`~ost.bindings.hhblits` - Search related sequences in databases ================================================================================ -.. module:: ost.bindings.hhblits - :synopsis: Search related sequences in databases - Introduction -------------------------------------------------------------------------------- @@ -15,7 +12,7 @@ one is provided, queried with a sequence profile. The latter one needs to be calculated before the actual search. In very simple words, HHblits is using per-sequence scoring functions to be more sensitive, in this particular case Hidden Markov models. The software suite needed for HHblits can be found -`here <http://toolkit.tuebingen.mpg.de/hhblits>`_. +`here <http://wwwuser.gwdg.de/~compbiol/data/hhsuite/releases/all/>`_. Examples @@ -59,6 +56,9 @@ First query by sequence: for hit in hits: print hit.aln + # cleanup + hh.Cleanup() + Very similar going by file: .. code-block:: python @@ -84,6 +84,9 @@ Very similar going by file: for hit in hits: print hit.aln + # cleanup + hh.Cleanup() + The alignments produced by HHblits are sometimes slightly better than by BLAST, so one may want to extract them: @@ -105,24 +108,15 @@ so one may want to extract them: print output['msa'] + # cleanup + hh.Cleanup() + Binding API -------------------------------------------------------------------------------- -.. autoclass:: ost.bindings.hhblits.HHblits +.. automodule:: ost.bindings.hhblits + :synopsis: Search related sequences in databases :members: -.. autoclass:: ost.bindings.hhblits.HHblitsHit - -.. autoclass:: ost.bindings.hhblits.HHblitsHeader - -.. autofunction:: ost.bindings.hhblits.ParseHHblitsOutput - -.. autofunction:: ost.bindings.hhblits.ParseA3M - -.. autofunction:: ost.bindings.hhblits.ParseHeaderLine - -.. autofunction:: ost.bindings.hhblits.ParseHHM - -.. autofunction:: ost.bindings.hhblits.EstimateMemConsumption .. LocalWords: HHblits homologs diff --git a/modules/bindings/pymod/clustalw.py b/modules/bindings/pymod/clustalw.py index e5400d1ac58d9d621ff27fa1b403b7e6421a7f57..29af869f445f1c1353c2673770048125c507a4fc 100644 --- a/modules/bindings/pymod/clustalw.py +++ b/modules/bindings/pymod/clustalw.py @@ -6,7 +6,7 @@ import subprocess def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, clustalw_option_string=False): ''' - Runs a clustalw multiple sequence alignment. The results are returned as a + Runs a ClustalW multiple sequence alignment. The results are returned as a :class:`~ost.seq.AlignmentHandle` instance. There are two ways to use this function: @@ -36,17 +36,25 @@ def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`. - :param clustalw: path to clustalw executable (used in :func:`~ost.settings.Locate`) + :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`) :type clustalw: :class:`str` :param nopgap: turn residue-specific gaps off :type nopgap: :class:`bool` - :param clustalw_option_string: additional clustalw flags (see http://toolkit.tuebingen.mpg.de/clustalw/help_params) + :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt) :type clustalw_option_string: :class:`str` :param keep_files: do not delete temporary files :type keep_files: :class:`bool` - Note: ClustalW will convert lowercase to uppercase, and change all '.' to '-'. - OST will convert and '?' to 'X' before aligning sequences with Clustalw. + .. note :: + + - In the passed sequences ClustalW will convert lowercase to uppercase, and + change all '.' to '-'. OST will convert and '?' to 'X' before aligning + sequences with ClustalW. + - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces, + only the part before the space is considered as sequence name. To avoid + surprises, you should remove spaces from the sequence name. + - Sequence names must be unique (:class:`ValueError` exception raised + otherwise). ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes: @@ -94,7 +102,8 @@ def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, sequence_names = set() for s in seq_list: - sequence_names.add(s.GetName()) + # we cut out anything after a space to be consistent with ClustalW behaviour + sequence_names.add(s.GetName().split(' ')[0]) if len(sequence_names) < len(seq_list): raise ValueError("ClustalW can only process sequences with unique identifiers!") diff --git a/modules/bindings/pymod/hhblits.py b/modules/bindings/pymod/hhblits.py index dddaa7404688f414eaa1a3915c6141b049ff52ce..0a1045171c167a5e24e958d4256739443b37970b 100644 --- a/modules/bindings/pymod/hhblits.py +++ b/modules/bindings/pymod/hhblits.py @@ -1,4 +1,4 @@ -'''HHblits wrapper. +'''HHblits wrapper classes and functions. ''' import subprocess @@ -124,8 +124,8 @@ def ParseHeaderLine(line): :param line: Line from the output header. :type line: :class:`str` - :return: Hit information - :rtype: :class:`HHblitsHit` + :return: Hit information and query/template offsets + :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`)) ''' for i in range(0, len(line)): if line[i].isdigit(): @@ -147,15 +147,14 @@ def ParseHeaderLine(line): def ParseHHblitsOutput(output): """ - Parses the HHblits output and returns a tuple of :class:`HHblitsHeader` and - a list of :class:`HHblitsHit` instances. + Parses the HHblits output as produced by :meth:`HHblits.Search` and returns + the header of the search results and a list of hits. - :param output: output of a :meth:`HHblits.Search`, needs to be iteratable, - e.g. an open file handle - :type output: :class:`file`/ iteratable + :param output: Iterable containing the lines of the HHblits output file + :type output: iterable (e.g. an open file handle) :return: a tuple of the header of the search results and the hits - :rtype: (:class:`HHblitsHeader`, :class:`HHblitsHit`) + :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`) """ lines = iter(output) def _ParseHeaderSection(lines): @@ -216,6 +215,12 @@ def ParseHHblitsOutput(output): return seq.CreateAlignment(s1, s2) try: while True: + # Lines which we are interested in: + # - "Done!" -> end of list + # - "No ..." -> next item in list + # - "T <hit_id> <start> <data> <end>" + # - "Q <query_id> <start> <data> <end>" + # -> rest is to be skipped line = lines.next() if len(line.strip()) == 0: continue @@ -239,19 +244,30 @@ def ParseHHblitsOutput(output): lines.next() continue assert entry_index != None + # Skip all "T ..." and "Q ..." lines besides the one we want if line[1:].startswith(' Consensus'): continue if line[1:].startswith(' ss_pred'): continue if line[1:].startswith(' ss_conf'): continue + if line[1:].startswith(' ss_dssp'): + continue if line.startswith('T '): end_pos = line.find(' ', 22) - assert end_pos != -1 + # this can fail if we didn't skip all other "T ..." lines + if end_pos == -1: + error_str = "Unparsable line '%s' for entry No %d" \ + % (line.strip(), entry_index + 1) + raise AssertionError(error_str) templ_str += line[22:end_pos] if line.startswith('Q '): end_pos = line.find(' ', 22) - assert end_pos != -1 + # this can fail if we didn't skip all other "Q ..." lines + if end_pos == -1: + error_str = "Unparsable line '%s' for entry No %d" \ + % (line.strip(), entry_index + 1) + raise AssertionError(error_str) query_str += line[22:end_pos] except StopIteration: if len(query_str) > 0: @@ -271,10 +287,10 @@ def ParseHHblitsOutput(output): def ParseA3M(a3m_file): ''' Parse secondary structure information and the multiple sequence alignment - out of an A3M file. + out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`. - :param a3m_file: Iteratable containing the lines of the A3M file - :type a3m_file: iteratable, e.g. an opened file + :param a3m_file: Iterable containing the lines of the A3M file + :type a3m_file: iterable (e.g. an open file handle) :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`). @@ -323,36 +339,36 @@ def ParseA3M(a3m_file): t = msa_seq[0] al = seq.AlignmentList() for i in range(1, len(msa_seq)): - qs = '' - ts = '' - k = 0 - for c in msa_seq[i]: - if c.islower(): - qs += '-' - ts += c.upper() - else: - qs += t[k] - ts += c - k += 1 - nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), - seq.CreateSequence(msa_head[i], ts)) - al.append(nl) + qs = '' + ts = '' + k = 0 + for c in msa_seq[i]: + if c.islower(): + qs += '-' + ts += c.upper() + else: + qs += t[k] + ts += c + k += 1 + nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), + seq.CreateSequence(msa_head[i], ts)) + al.append(nl) profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\ - al, - seq.CreateSequence(msa_head[0], - t)) + al, seq.CreateSequence(msa_head[0], t)) return profile_dict def ParseHHM(profile): - '''Parse secondary structure information and the MSA out of an HHM profile. + ''' + Parse secondary structure information and the MSA out of an HHM profile as + produced by :meth:`HHblits.A3MToProfile`. :param profile: Opened file handle holding the profile. :type profile: :class:`file` :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and - "consensus" (~ost.seq.SequenceHandle). + "consensus" (:class:`~ost.seq.SequenceHandle`). ''' profile_dict = dict() state = 'NONE' @@ -423,25 +439,13 @@ def ParseHHM(profile): seq.CreateSequence(msa_head[i], ts)) al.append(nl) profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\ - al, - seq.CreateSequence(msa_head[0], t)) - #print profile_dict['msa'].ToString(80) + al, seq.CreateSequence(msa_head[0], t)) + #print profile_dict['msa'].ToString(80) # Consensus profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt) return profile_dict -def EstimateMemConsumption(): - """ - Estimate the memory needed by HHblits. By default it uses not more than 3G. - Also for small sequences it already uses quite some memnmory (46AA, 1.48G). - And since the memory consumption could depend on the iterative search runs, - how many hits are found in each step, we just go with 4G, here. - - :return: Assumed memory consumtion - :rtype: (:class:`float`, :class:`str`) - """ - return 4.0, 'G' class HHblits: """ @@ -461,7 +465,6 @@ class HHblits: :param working_dir: Directory for temporary files. Will be created if not present but **not** automatically deleted. :type working_dir: :class:`str` - """ OUTPUT_PREFIX = 'query_hhblits' def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None): @@ -474,6 +477,8 @@ class HHblits: self.hhblits_bin = settings.Locate('hhblits', explicit_file_name=hhblits_bin) self.bin_dir = os.path.dirname(self.hhblits_bin) + # guess root folder (note: this may fail in future) + self.hhsuite_root = os.path.dirname(self.bin_dir) self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh') if working_dir: self.needs_cleanup = False @@ -501,63 +506,63 @@ class HHblits: self.working_dir = tmp_dir.dirname self.filename = tmp_dir.files[0] - def Cleanup(self): - """Delete temporary data. + def BuildQueryMSA(self, nrdb, options={}, a3m_file=None): + """Builds the MSA for the query sequence. - Delete temporary data if no working dir was given. Controlled by - :attr:`needs_cleanup`. - """ - if self.needs_cleanup and os.path.exists(self.working_dir): - shutil.rmtree(self.working_dir) + This function directly uses hhblits of hhtools. While in theory it would + be possible to do this by PSI-blasting on our own, hhblits is supposed + to be faster. Also it is supposed to prevent alignment corruption. The + alignment corruption is caused by low-scoring terminal alignments that + draw the sequences found by PSI-blast away from the optimum. By removing + these low scoring ends, part of the alignment corruption can be + suppressed. - def BuildQueryMSA(self, nrdb, iterations=1, mact=None, cpu=1): - """Builds the MSA for the query sequence + hhblits does **not** call PSIPRED on the MSA to predict the secondary + structure of the query sequence. This is done by addss.pl of hhtools. + The predicted secondary structure is stored together with the sequences + identified by hhblits. - This function directly uses hhblits of hhtools. While in theory it - would be possible to do this by PSI-blasting on our own, hhblits is - supposed to be faster. Also it is supposed to prevent alignment - corruption. The alignment corruption is caused by low-scoring terminal - alignments that draw the sequences found by PSI-blast away from the - optimum. By removing these low scoring ends, part of the alignment - corruption can be suppressed. hhblits does **not** call PSIPRED on the - MSA to predict the secondary structure of the query sequence. This is - done by addss.pl of hhtools. The predicted secondary structure is - stored together with the sequences identified by hhblits. + The produced A3M file can be parsed by :func:`ParseA3M`. If the file was + already produced, hhblits is not called again and the existing file path + is returned. :param nrdb: Database to be align against; has to be an hhblits database :type nrdb: :class:`str` - :param iterations: Number of hhblits iterations - :type iterations: :class:`int` - - :param mact: ``-mact`` of hhblits - :type mact: :class:`float` + :param options: Dictionary of options to *hhblits*, one "-" is added in + front of every key. Boolean True values add flag without + value. Merged with default options {'cpu': 1, 'n': 1}, + where 'n' defines the number of iterations. + :type options: :class:`dict` - :param cpu: ``-cpu`` of hhblits - :type cpu: :class:`int` + :param a3m_file: a path of a3m_file to be used, optional + :type a3m_file: :class:`str` - :return: the path to the MSA file + :return: The path to the A3M file containing the MSA :rtype: :class:`str` """ - a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0] + if a3m_file is None: + a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0] + if os.path.exists(a3m_file): + ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file) + return a3m_file ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root) full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]), os.path.split(nrdb)[1]) # create MSA - hhblits_cmd = '%s -e 0.001 -cpu %d -i %s -oa3m %s -d %s -n %d' % \ - (self.hhblits_bin, cpu, self.filename, a3m_file, - full_nrdb, iterations) - if mact: - hhblits_cmd += '-mact %f' % mact + opts = {'cpu' : 1, # no. of cpus used + 'n' : 1} # no. of iterations + opts.update(options) + opt_cmd, _ = _ParseOptions(opts) + hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \ + (self.hhblits_bin, self.filename, a3m_file, full_nrdb, + opt_cmd) job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, _ = job.communicate() - #lines = sout.splitlines() - #for l in lines: - # print l.strip() - #lines = serr.splitlines() - #for l in lines: - # print l.strip() + lines = sout.splitlines() + for line in lines: + ost.LogVerbose(line.strip()) if not os.path.exists(a3m_file): ost.LogWarning('Building query profile failed, no output') return a3m_file @@ -580,7 +585,7 @@ class HHblits: if 'error' in line.lower(): ost.LogWarning('Predicting secondary structure for MSA '+ '(%s) failed, on command: %s' % (a3m_file, line)) - return a3m_file + return a3m_file return a3m_file def A3MToProfile(self, a3m_file, hhm_file=None): @@ -588,13 +593,18 @@ class HHblits: Converts the A3M alignment file to a hhm profile. If hhm_file is not given, the output file will be set to <:attr:`a3m_file`-basename>.hhm. - :param a3m_file: input MSA + The produced A3M file can be parsed by :func:`ParseHHM`. + + If the file was already produced, the existing file path is returned + without recomputing it. + + :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA` :type a3m_file: :class:`str` - :param hhm_file: output file name + :param hhm_file: Desired output file name :type hhm_file: :class:`str` - :return: the path to the profile + :return: Path to the profile file :rtype: :class:`str` """ hhmake = os.path.join(self.bin_dir, 'hhmake') @@ -609,24 +619,27 @@ class HHblits: raise IOError('could not convert a3m to hhm file') return hhm_file - def A3MToCS(self, a3m_file, cs_file=None, options={}): """ Converts the A3M alignment file to a column state sequence file. If cs_file is not given, the output file will be set to <:attr:`a3m_file`-basename>.seq219. - :param a3m_file: A3M file to be converted + If the file was already produced, the existing file path is returned + without recomputing it. + + :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA` :type a3m_file: :class:`str` - :param cs_file: output file name (may be omitted) + :param cs_file: Output file name (may be omitted) :type cs_file: :class:`str` - :param options: dictionary of options to *cstranslate*, must come with - the right amount of '-' in front. + :param options: Dictionary of options to *cstranslate*, one "-" is added + in front of every key. Boolean True values add flag + without value. :type options: :class:`dict` - :return: the path to the column state sequence file + :return: Path to the column state sequence file :rtype: :class:`str` """ cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate') @@ -634,31 +647,36 @@ class HHblits: cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0] if os.path.exists(cs_file): return cs_file - opt_cmd = list() - for k, val in options.iteritems(): - if type(val) == type(True): - if val == True: - opt_cmd.append('%s' % str(k)) - else: - opt_cmd.append('%s %s' % (str(k), str(val))) - opt_cmd = ' '.join(opt_cmd) - cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, opt_cmd) + opt_cmd, _ = _ParseOptions(options) + cs_cmd = '%s -i %s -o %s %s' % ( + cstranslate, + os.path.abspath(a3m_file), + os.path.abspath(cs_file), + opt_cmd) ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file)) - job = subprocess.Popen(cs_cmd, shell=True, + job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, _ = job.communicate() - #lines = serr.splitlines() - #for l in lines: - # print l lines = sout.splitlines() for line in lines: - if line in 'Wrote abstract state sequence to %s' % cs_file: + if 'Wrote abstract state sequence to' in line: return cs_file ost.LogWarning('Creating column state sequence file (%s) failed' % \ cs_file) + def Cleanup(self): + """Delete temporary data. + + Delete temporary data if no working dir was given. Controlled by + :attr:`needs_cleanup`. + """ + if self.needs_cleanup and os.path.exists(self.working_dir): + shutil.rmtree(self.working_dir) + def CleanupFailed(self): '''In case something went wrong, call to make sure everything is clean. + + This will delete the working dir independently of :attr:`needs_cleanup`. ''' store_needs_cleanup = self.needs_cleanup self.needs_cleanup = True @@ -667,50 +685,41 @@ class HHblits: def Search(self, a3m_file, database, options={}, prefix=''): """ - Searches for templates in the given database. Before running the - search, the hhm file is copied. This makes it possible to launch - several hhblits instances at once. Upon success, the filename of the - result file is returned. This file may be parsed with - :func:`ParseHHblitsOutput`. + Searches for templates in the given database. Before running the search, + the hhm file is copied. This makes it possible to launch several hhblits + instances at once. Upon success, the filename of the result file is + returned. This file may be parsed with :func:`ParseHHblitsOutput`. - :param a3m_file: input MSA file + :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA` :type a3m_file: :class:`str` - :param database: search database, needs to be the common prefix of the + :param database: Search database, needs to be the common prefix of the database files :type database: :class:`str` - :param options: dictionary of options, must come with the right amount - of '-' in front. + :param options: Dictionary of options to *hhblits*, one "-" is added in + front of every key. Boolean True values add flag without + value. Merged with default options {'cpu': 1, 'n': 1}, + where 'n' defines the number of iterations. :type options: :class:`dict` - :param prefix: prefix to the result file + :param prefix: Prefix to the result file :type prefix: :class:`str` - :return: the path to the result file + :return: The path to the result file :rtype: :class:`str` """ opts = {'cpu' : 1, # no. of cpus used 'n' : 1} # no. of iterations opts.update(options) - opt_cmd = [] - opt_str = [] - for k, val in opts.iteritems(): - if type(val) == type(True): - if val == True: - opt_cmd.append('-%s' % str(k)) - opt_str.append(str(k)) - else: - opt_cmd.append('-%s %s' % (str(k), str(val))) - opt_str.append('%s%s' % (str(k), str(val))) - opt_cmd = ' '.join(opt_cmd) - opt_str = '_'.join(opt_str) + opt_cmd, opt_str = _ParseOptions(opts) base = os.path.basename(os.path.splitext(a3m_file)[0]) hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str) hhr_file = os.path.join(self.working_dir, hhr_file) - search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s'%( + search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % ( self.hhblits_bin, - opt_cmd, os.path.abspath(a3m_file), + opt_cmd, + os.path.abspath(a3m_file), hhr_file, os.path.join(os.path.abspath(os.path.split(database)[0]), os.path.split(database)[1])) @@ -722,20 +731,44 @@ class HHblits: if job.returncode != 0: lines = sout.splitlines() for line in lines: - print line.strip() + ost.LogError(line.strip()) lines = serr.splitlines() for line in lines: - print line.strip() + ost.LogError(line.strip()) return None return hhr_file -__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader', 'ParseHeaderLine', +def _ParseOptions(opts): + """ + :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be + passed to command ("-" added in front of keys, options + separated by space) and opt_str (options separated by "_") + can be used for filenames. + :param opts: Dictionary of options, one "-" is added in front of every + key. Boolean True values add flag without value. + """ + opt_cmd = list() + opt_str = list() + for k, val in opts.iteritems(): + if type(val) == type(True): + if val == True: + opt_cmd.append('-%s' % str(k)) + opt_str.append(str(k)) + else: + opt_cmd.append('-%s %s' % (str(k), str(val))) + opt_str.append('%s%s' % (str(k), str(val))) + opt_cmd = ' '.join(opt_cmd) + opt_str = '_'.join(opt_str) + return opt_cmd, opt_str + + +__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader', 'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM', - 'EstimateMemConsumption'] + 'ParseHeaderLine'] -# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str mact +# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str # LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir # LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln -# LocalWords: HHblitsHit iteratable evalue pvalue neff hmms datetime +# LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime # LocalWords: whitespace whitespaces diff --git a/modules/bindings/pymod/utils.py b/modules/bindings/pymod/utils.py index 02edd598f9b664c165f385b92429b619d7bf09fd..e476d04a9a752c64f86cf0f0e4d0a4418cfa405c 100644 --- a/modules/bindings/pymod/utils.py +++ b/modules/bindings/pymod/utils.py @@ -29,7 +29,7 @@ def SaveToTempDir(objects, seq_format='fasta', structure_format='pdb'): file_names.append(name) continue if isinstance(obj, mol.EntityView) or isinstance(obj, mol.EntityHandle): - name=os.path.join(tmp_dir_name, tmp_dir_name, 'mol%02d.pdb' % (index+1)) + name=os.path.join(tmp_dir_name, 'mol%02d.pdb' % (index+1)) io.SaveEntity(obj, name, structure_format) file_names.append(name) continue diff --git a/modules/bindings/tests/test_clustalw.py b/modules/bindings/tests/test_clustalw.py index f5625ce9bc4636aa7e98071bad5021c3d22dc256..8f295088ed235ab3ad401eff4525b985df51280d 100644 --- a/modules/bindings/tests/test_clustalw.py +++ b/modules/bindings/tests/test_clustalw.py @@ -63,11 +63,13 @@ class TestClustalWBindings(unittest.TestCase): "Pairwise alignment with modified gap penalties differs from precomputed one" def testUniqueIdentifier(self): - seq1 = seq.CreateSequence('heelloo','AWESOME') - seq2 = seq.CreateSequence('heelloo','AWESOME') - - self.assertRaises(ValueError,clustalw.ClustalW,seq1,seq2) - + # common case + seq1 = seq.CreateSequence('heelloo', 'AWESOME') + seq2 = seq.CreateSequence('heelloo', 'AWESOME') + self.assertRaises(ValueError, clustalw.ClustalW, seq1, seq2) + # nasty case with spaces + seq2 = seq.CreateSequence('heelloo dear', 'AWESOME') + self.assertRaises(ValueError, clustalw.ClustalW, seq1, seq2) if __name__ == "__main__": # test if clustalw package is available on system, otherwise ignore tests diff --git a/modules/bindings/tests/test_hhblits.py b/modules/bindings/tests/test_hhblits.py index c396528b8c623d620e07f7611431760afcfd1725..2ba099ced0c4e7f5dd7d0856431cd48d22a326b2 100644 --- a/modules/bindings/tests/test_hhblits.py +++ b/modules/bindings/tests/test_hhblits.py @@ -196,7 +196,7 @@ class TestHHblitsBindings(unittest.TestCase): _, self.tmpfile = tempfile.mkstemp(suffix='.seq219') os.remove(self.tmpfile) csfile = self.hh.A3MToCS("testfiles/testali.a3m", - cs_file=self.tmpfile, options={'--alphabet' : + cs_file=self.tmpfile, options={'-alphabet' : os.path.join(self.hh.hhlib_dir, 'data', 'cs219.lib')}) @@ -211,7 +211,7 @@ class TestHHblitsBindings(unittest.TestCase): 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) csfile = self.hh.A3MToCS("testfiles/testali.a3m", - options={'--alphabet' : + options={'-alphabet' : os.path.join(self.hh.hhlib_dir, 'data', 'cs219.lib')}) @@ -228,7 +228,7 @@ class TestHHblitsBindings(unittest.TestCase): self.hh = hhblits.HHblits(query_seq, self.hhroot) csfile = self.hh.A3MToCS("testfiles/testali.a3m", cs_file='testfiles/test.seq219', - options={'--alphabet' : + options={'-alphabet' : os.path.join(self.hh.hhlib_dir, 'data', 'cs219.lib')}) @@ -305,6 +305,17 @@ class TestHHblitsBindings(unittest.TestCase): 'HHHHHHHHHHHHCC') self.assertEqual(prof['msa'].GetCount(), 253) + def fastParseHeader(self): + header_line = ' 1 814cbc1899f35c872169524af30fc2 100.0 5E-100' + \ + ' 5E-104 710.5 34.1 277 3-293 2-280 (281)' + hit, offset = hhblits.ParseHeaderLine(header_line) + self.assertEqual(hit.hit_id, '814cbc1899f35c872169524af30fc2') + self.assertAlmostEqual(hit.evalue, 0) + self.assertAlmostEqual(hit.prob, 100.0) + self.assertAlmostEqual(hit.pvalue, 0) + self.assertAlmostEqual(hit.score, 710.5) + self.assertAlmostEqual(hit.ss_score, 34.1) + def testParseHHblitsOutput(self): header, hits = hhblits.ParseHHblitsOutput(open("testfiles/test.hhr")) self.assertEqual(header.query, 'Test') @@ -385,8 +396,6 @@ class TestHHblitsBindings(unittest.TestCase): 'Test VDPVNFKLLSHCLLVTLAAHL\ne69e1ac0'+ 'a4b2554d... ATPEQAQLVHKEIRKIVKDTC\n') -# ParseHHblitsOutput - if __name__ == "__main__": hhsuite_root_dir = os.getenv('EBROOTHHMINSUITE') if not hhsuite_root_dir: diff --git a/modules/config/base.hh b/modules/config/base.hh index 2019437b5b47828492ec8de2422a77287d650092..b10638e7ede8f4a6f301aad7db6dd8d05b243bc0 100644 --- a/modules/config/base.hh +++ b/modules/config/base.hh @@ -51,28 +51,15 @@ typedef unsigned int uint; typedef std::complex<Real> Complex; typedef unsigned short Word; +typedef std::string String; +// NOTE: Before OST 1.8, we used to have round and rint functions defined here +// -> round and rint are available for any compiler since many years now +// -> Tested for GCC 4.1.2 - 9.0.0, clang 3.3.0 - 8.0.0, MSVC 2015 - 2017 using +// godbolt.org. In all cases: call with float is not casted to double, but +// kept as float which is desired behaviour for good performance. -#ifndef round_function -#define round_function -#ifndef round -inline Real round( Real d ) -{ - return floor(d+Real(0.5)); -} -#endif -#endif - -#ifndef rint_function -#define rint_function -#ifndef rint -inline Real rint(Real d) -{ - return floor(d+Real(0.5)); -} -#endif -#endif - +// NOTE: OST has not been tested in MSVC for a very long time! #if _MSC_VER #pragma warning(disable:4251) #pragma warning(disable:4275) @@ -104,7 +91,4 @@ inline double log2( double n ) #endif -typedef std::string String; - - #endif diff --git a/modules/conop/doc/aminoacid.rst b/modules/conop/doc/aminoacid.rst index de15eb98a91c3fe53a9008c20d314d4aa8954edc..a33104ca6b91085b63a3a0df02b100ebd1d9c2d7 100644 --- a/modules/conop/doc/aminoacid.rst +++ b/modules/conop/doc/aminoacid.rst @@ -79,63 +79,4 @@ Converter functions .. method:: Empty() - Whether the set is empty, i.e. doesn't contain any amino acids. - - -Mapping functions --------------------------------------------------------------------------------- - -The following functions help to convert one residue into another by reusing as -much as possible from the present atoms. They are mainly meant to map from -standard amino acid to other standard amino acids or from modified amino acids -to standard amino acids. - -.. function:: CopyResidue(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` using the residue names - as guide to decide which of the atoms should be copied. If ``src_res`` and - ``dst_res`` have the same name, or ``src_res`` is a modified version of - ``dst_res`` (i.e. have the same single letter code), CopyConserved will be - called, otherwise CopyNonConserved will be called. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: true if the residue could be copied, false if not. - -.. function:: CopyConserved(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent - amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. - - If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied - to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the - modification is a pure addition (e.g. the phosphate group of phosphoserine), - the modification is stripped off and all other heavy atoms are copied to - ``dst_res``. If the modification is not a pure addition, only the backbone - heavy atoms are copied to ``dst_res``. - - Additionally, the selenium atom of ``MSE`` is converted to sulphur. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. - -.. function:: CopyNonConserved(src_res, dst_res, editor) - - Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` - to ``dst_res``. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. + Whether the set is empty, i.e. doesn't contain any amino acids. \ No newline at end of file diff --git a/modules/conop/doc/compoundlib.rst b/modules/conop/doc/compoundlib.rst index caca8445b454408a8b103e96129e847e71045d55..41f859e7a8787499553fad1b1639e0827379c60c 100644 --- a/modules/conop/doc/compoundlib.rst +++ b/modules/conop/doc/compoundlib.rst @@ -45,7 +45,7 @@ build the compound library manually. write mode, the programs can deadlock. :type readonly: :class:`bool` - :returns: The loaded compound lib + :returns: The loaded compound lib or None if it failed. .. staticmethod:: Create(database) diff --git a/modules/conop/pymod/CMakeLists.txt b/modules/conop/pymod/CMakeLists.txt index 9c09906222fb97e1c935590ec22e81b536c94891..23dff26a0022fa9c312c9bfd1acda033d1c304b7 100644 --- a/modules/conop/pymod/CMakeLists.txt +++ b/modules/conop/pymod/CMakeLists.txt @@ -8,7 +8,6 @@ set(OST_CONOP_PYMOD_SOURCES export_conop.cc export_diag.cc export_rule_based.cc - export_non_standard.cc export_ring_finder.cc ) diff --git a/modules/conop/pymod/wrap_conop.cc b/modules/conop/pymod/wrap_conop.cc index 119c86c325f3ce6fc48cf88298838f441cb04a36..04bcfce147b3ebf986cee454c2594e242ade7059 100644 --- a/modules/conop/pymod/wrap_conop.cc +++ b/modules/conop/pymod/wrap_conop.cc @@ -25,7 +25,6 @@ void export_Sanitizer(); void export_Conop(); void export_RingFinder(); void export_AminoAcids(); -void export_NonStandard(); void export_processor(); void export_rule_based(); void export_heuristic(); @@ -41,6 +40,5 @@ BOOST_PYTHON_MODULE(_ost_conop) export_Compound(); export_RingFinder(); export_AminoAcids(); - export_NonStandard(); export_diag(); } diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 0185618ea4a9ebb02d917393670ce3f401ed494c..c2f1f4c6d0148c62461be5852bd33872ad39f903 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -9,7 +9,6 @@ compound.hh compound_lib.hh module_config.hh rule_based.hh -nonstandard.hh minimal_compound_lib.hh compound_lib_base.hh ring_finder.hh @@ -27,12 +26,11 @@ rule_based.cc model_check.cc compound.cc compound_lib.cc -nonstandard.cc ring_finder.cc ) module(NAME conop SOURCES ${OST_CONOP_SOURCES} - HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_mol_alg ost_geom ost_db) + HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_geom ost_db) if (WIN32) diff --git a/modules/conop/src/compound_lib.cc b/modules/conop/src/compound_lib.cc index e2cf22263aa88071b425f686cd0d7a9666ef112c..3509db72e85d6c81db125ee910931ed7b435eea5 100644 --- a/modules/conop/src/compound_lib.cc +++ b/modules/conop/src/compound_lib.cc @@ -361,17 +361,20 @@ CompoundLibPtr CompoundLib::Load(const String& database, bool readonly) static_cast<int>(aq.length()), &stmt, NULL); lib->chem_type_available_ = retval==SQLITE_OK; + sqlite3_finalize(stmt); aq="SELECT name FROM chem_compounds LIMIT 1"; retval=sqlite3_prepare_v2(lib->conn_, aq.c_str(), static_cast<int>(aq.length()), &stmt, NULL); lib->name_available_ = retval==SQLITE_OK; + sqlite3_finalize(stmt); // check if InChIs are available aq="SELECT inchi_code FROM chem_compounds LIMIT 1"; retval=sqlite3_prepare_v2(lib->conn_, aq.c_str(), static_cast<int>(aq.length()), &stmt, NULL); lib->inchi_available_ = retval==SQLITE_OK; + sqlite3_finalize(stmt); lib->creation_date_ = lib->GetCreationDate(); lib->ost_version_used_ = lib->GetOSTVersionUsed(); @@ -428,7 +431,7 @@ void CompoundLib::LoadBondsFromDB(CompoundPtr comp, int pk) const { } else { LOG_ERROR(sqlite3_errmsg(conn_)); } - sqlite3_finalize(stmt); + sqlite3_finalize(stmt); } CompoundPtr CompoundLib::FindCompound(const String& id, @@ -522,7 +525,11 @@ CompoundLib::CompoundLib(): CompoundLib::~CompoundLib() { if (conn_) { - sqlite3_close(conn_); + int retval = sqlite3_close(conn_); + if (retval != SQLITE_OK) { + LOG_ERROR("Problem while closing SQLite db for CompoundLib: " + << sqlite3_errmsg(conn_)); + } } } }} diff --git a/modules/conop/src/rule_based.cc b/modules/conop/src/rule_based.cc index 6e8b712fe63fdff02fec5836fd39316583dacc35..fe2013e254b80f5b4ff5b52474ff672873adb72c 100644 --- a/modules/conop/src/rule_based.cc +++ b/modules/conop/src/rule_based.cc @@ -19,6 +19,7 @@ #include <limits> #include <ost/log.hh> #include <ost/profile.hh> +#include <ost/message.hh> #include <ost/mol/xcs_editor.hh> #include <ost/mol/bond_handle.hh> #include <ost/mol/torsion_handle.hh> @@ -30,8 +31,6 @@ namespace ost { namespace conop { - - void RuleBasedProcessor::DoProcess(DiagnosticsPtr diags, mol::EntityHandle ent) const { @@ -199,6 +198,11 @@ String RuleBasedProcessor::ToString() const { return ss.str(); } - +void RuleBasedProcessor::_CheckLib() const { + if (!lib_) { + throw Error("Cannot initialize RuleBasedProcessor without a valid " + "CompoundLib object!"); + } +} }} diff --git a/modules/conop/src/rule_based.hh b/modules/conop/src/rule_based.hh index 3b91771a29c027f9cbd503c3927f3a13b58add34..719ba78030003cae44dfa881d8339757b3008102 100644 --- a/modules/conop/src/rule_based.hh +++ b/modules/conop/src/rule_based.hh @@ -40,13 +40,19 @@ public: lib_(compound_lib), fix_element_(true), strict_hydrogens_(false), unk_res_treatment_(CONOP_WARN), unk_atom_treatment_(CONOP_WARN) { + _CheckLib(); } - RuleBasedProcessor(CompoundLibPtr compound_lib, bool fe, bool sh, ConopAction ur, - ConopAction ua, bool bf, bool at, bool cn, bool aa, ConopAction zo): + RuleBasedProcessor(CompoundLibPtr compound_lib, bool fe, bool sh, + ConopAction ur, ConopAction ua, bool bf, bool at, bool cn, + bool aa, ConopAction zo): Processor(bf, at, cn, aa, zo), lib_(compound_lib), fix_element_(fe), strict_hydrogens_(sh), unk_res_treatment_(ur), - unk_atom_treatment_(ua) {} + unk_atom_treatment_(ua) + { + _CheckLib(); + } + ConopAction GetUnkResidueTreatment() const { return unk_res_treatment_; } @@ -91,6 +97,8 @@ protected: virtual void DoProcess(DiagnosticsPtr diags, mol::EntityHandle ent) const; private: + void _CheckLib() const; + CompoundLibPtr lib_; bool fix_element_; bool strict_hydrogens_; diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index 96400650977655f3efed4108de874bf9e1168bee..44fd58acfc7036926e3e5b31ebe51db7129291c8 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -9,8 +9,7 @@ set(OST_CONOP_UNIT_TESTS if (COMPOUND_LIB) list(APPEND OST_CONOP_UNIT_TESTS test_compound.py - test_cleanup.py - test_nonstandard.py) + test_cleanup.py) endif() ost_unittest(MODULE conop diff --git a/modules/conop/tests/test_rule_based_conop.cc b/modules/conop/tests/test_rule_based_conop.cc index 6444695d9f85cece9e06c9b4c6153ec8645963ba..1ce4ebe2436ff5f3d148a6272d3cb7b73cefb481 100644 --- a/modules/conop/tests/test_rule_based_conop.cc +++ b/modules/conop/tests/test_rule_based_conop.cc @@ -50,9 +50,24 @@ CompoundLibPtr load_lib() BOOST_AUTO_TEST_SUITE(conop); +BOOST_AUTO_TEST_CASE(rule_based_init_check) +{ + CompoundLibPtr lib; // null ptr is return value when library loading failed + BOOST_CHECK_THROW(RuleBasedProcessor rbc1(lib), ost::Error); + BOOST_CHECK_THROW(RuleBasedProcessor rbc2(lib, true, false, CONOP_WARN, + CONOP_WARN, false, true, true, true, + CONOP_WARN), ost::Error); + lib = load_lib(); + if (!lib) { return; } + BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc3(lib)); + BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc4(lib, true, false, CONOP_WARN, + CONOP_WARN, false, true, true, + true, CONOP_WARN)); +} + BOOST_AUTO_TEST_CASE(rule_based_set_get_flags) { - CompoundLibPtr lib=load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); // check the defaults @@ -76,7 +91,7 @@ BOOST_AUTO_TEST_CASE(rule_based_set_get_flags) BOOST_AUTO_TEST_CASE(rule_based_connect) { - CompoundLibPtr lib=load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); EntityHandle ent = CreateEntity(); @@ -94,7 +109,7 @@ BOOST_AUTO_TEST_CASE(rule_based_connect) BOOST_AUTO_TEST_CASE(rule_based_unk_atoms) { - CompoundLibPtr lib = load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); EntityHandle ent = CreateEntity(); @@ -125,7 +140,7 @@ BOOST_AUTO_TEST_CASE(rule_based_unk_atoms) BOOST_AUTO_TEST_CASE(guesses_elements_of_unknown_atoms) { - CompoundLibPtr lib = load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); EntityHandle ent = CreateEntity(); @@ -142,7 +157,7 @@ BOOST_AUTO_TEST_CASE(guesses_elements_of_unknown_atoms) BOOST_AUTO_TEST_CASE(fills_properties_of_unknown_residues) { - CompoundLibPtr lib = load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); EntityHandle ent = CreateEntity(); @@ -160,7 +175,7 @@ BOOST_AUTO_TEST_CASE(fills_properties_of_unknown_residues) BOOST_AUTO_TEST_CASE(connects_atoms_of_unknown_residues_based_on_distance) { - CompoundLibPtr lib = load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); EntityHandle ent = CreateEntity(); @@ -177,7 +192,7 @@ BOOST_AUTO_TEST_CASE(connects_atoms_of_unknown_residues_based_on_distance) BOOST_AUTO_TEST_CASE(rule_based_unk_res) { - CompoundLibPtr lib = load_lib(); + CompoundLibPtr lib = load_lib(); if (!lib) { return; } RuleBasedProcessor rbc(lib); EntityHandle ent = CreateEntity(); diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst new file mode 100644 index 0000000000000000000000000000000000000000..4ff4afc6aabf48fc88e8fb75ec9a717e51a19f5a --- /dev/null +++ b/modules/doc/actions.rst @@ -0,0 +1,391 @@ +.. ost-actions: + +OST Actions +================================================================================ + +A pure command line interface of OST is provided by actions. +You can execute ``ost -h`` for a list of possible actions and for every action, +you can type ``ost <ACTION> -h`` to get a description on its usage. + +Here we list the most prominent actions with simple examples. + +.. ost-compare-structures: + +Comparing two structures +-------------------------------------------------------------------------------- + +You can compare two structures in terms of quaternary structure score and +lDDT scores between two complexes from the command line with: + +.. code-block:: console + + $ ost compare-structures [-h] [-v VERBOSITY] -m MODEL -r REFERENCE + [-o OUTPUT] [-d] [-ds DUMP_SUFFIX] + [-rs REFERENCE_SELECTION] [-ms MODEL_SELECTION] + [-ca] [-ft] [-cl COMPOUND_LIBRARY] [-qs] + [-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [-rna] + [-l] [-ir INCLUSION_RADIUS] + [-ss SEQUENCE_SEPARATION] [-spr] [-ml] + [-rm REMOVE [REMOVE ...]] [-ce] [-mn] [-sc] + [-p PARAMETER_FILE] [-bt BOND_TOLERANCE] + [-at ANGLE_TOLERANCE] [-cc] + +By default the verbosity is set to 3 which will result in the informations +being shown in the console. The result can be (optionally) saved as JSON file +which is the preferred way of parsing it as the log output might change in the +future. Optionally, the local scores for lDDT can also be dumped to the output +file. Additionally, cleaned up structures can be saved to the disk. +The output file has following format: + +.. code-block:: none + + { + "result": { + "<MODEL NAME>": { # Model name extracted from the file name + "<REFERENCE NAME>": { # Reference name extracted from the file name + "info": { + "residue_names_consistent": <Are the residue numbers consistent? true or false>, + "mapping": { + "chain_mapping": <Mapping of chains eg. {"A": "B", "B": "A"}>, + "chain_mapping_scheme": <Scheme used to get mapping, check mapping manually if "permissive" or "extensive">, + "alignments": <list of chain-chain alignments in FASTA format> + } + }, + "lddt": { # calculated when --lddt (-l) option is selected + "oligo_lddt": { + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "global_score": <calculated oligomeric lddt score>, + }, + "weighted_lddt": { + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "global_score": <calculated weighted lddt score>, + }, + "single_chain_lddt": [ # a list of chain-chain lDDts + { + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "reference_chain": <name of the chain in reference>, + "model_chain": <name of the chain in model> + "global_score": <calculated single-chain lddt score>, + "conserved_contacts": <number of conserved contacts between model and reference>, + "total_contacts": <total number of contacts between model and reference>, + "per_residue_scores": [ # per-residue lDDT scores - calculated when --save-per-residue-scores (-spr) option is selected + { + "total_contacts": <total number of contacts between model and reference>, + "residue_name": <three letter code of the residue in reference chain>, + "lddt": <residue lDDT score>, + "conserved_contacts": <number of conserved contacts between model and reference for given residue>, + "residue_number": <residue number in reference chain> + }, + . + . + . + ] + } + ] + }, + "qs_score": { # calculated when --qs-score (-q) option is selected + "status": <SUCCESS or FAILURE>, + "error": <ERROR message if any>, + "global_score": <Global QS-score>, + "best_score": <Best QS-score>, + } + + } + } + }, + "options": {} # Options used to run the script + } + +The "result" filed is a dictionary mapping from model to reference as eg. in +mmCIF file there can be many entities and the script will compare all +combinations. + +Example usage: + +.. code-block:: console + + $ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/bu_target_01.pdb > reference.pdb + $ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb + $ $OST_ROOT/bin/ost compare-structures \ + --model model.pdb --reference reference.pdb --output output.json \ + --qs-score --residue-number-alignment --lddt --structural-checks \ + --consistency-checks --inclusion-radius 15.0 --bond-tolerance 15.0 \ + --angle-tolerance 15.0 --molck --remove oxt hyd unk \ + --clean-element-column --map-nonstandard-residues + + ################################################################################ + Reading input files (fault_tolerant=False) + --> reading model from model.pdb + imported 2 chains, 396 residues, 3106 atoms; with 0 helices and 0 strands + --> reading reference from reference.pdb + imported 3 chains, 408 residues, 3011 atoms; with 0 helices and 0 strands + ################################################################################ + Cleaning up input with Molck + removing hydrogen atoms + --> removed 0 hydrogen atoms + removing OXT atoms + --> removed 0 OXT atoms + residue A.GLN54 is missing 4 atoms: 'CG', 'CD', 'OE1', 'NE2' + residue A.GLU55 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2' + residue A.ARG139 is missing 6 atoms: 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2' + residue B.THR53 is missing 1 atom: 'CG2' + residue B.GLN54 is missing 4 atoms: 'CG', 'CD', 'OE1', 'NE2' + residue B.GLU55 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2' + residue B.GLU61 is missing 1 atom: 'OE2' + residue B.GLU117 is missing 1 atom: 'O' + residue B.ARG120 is missing 2 atoms: 'NH1', 'NH2' + residue B.ARG142 is missing 2 atoms: 'NH1', 'NH2' + residue B.GLU148 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2' + residue B.PRO198 is missing 1 atom: 'O' + _.CL1 is not a standard amino acid + _.CL2 is not a standard amino acid + _.CL3 is not a standard amino acid + _.CL4 is not a standard amino acid + _.CA5 is not a standard amino acid + _.CA6 is not a standard amino acid + _.CA7 is not a standard amino acid + _.CA8 is not a standard amino acid + _.CA9 is not a standard amino acid + _.CL10 is not a standard amino acid + _.CL11 is not a standard amino acid + _.CL12 is not a standard amino acid + _.CL13 is not a standard amino acid + _.CL14 is not a standard amino acid + _.CL15 is not a standard amino acid + _.CA16 is not a standard amino acid + _.CA17 is not a standard amino acid + _.CA18 is not a standard amino acid + _.CA19 is not a standard amino acid + _.CA20 is not a standard amino acid + _.EDO21 is not a standard amino acid + _.EDO22 is not a standard amino acid + _.EDO23 is not a standard amino acid + _.EDO24 is not a standard amino acid + removing hydrogen atoms + --> removed 0 hydrogen atoms + removing OXT atoms + --> removed 0 OXT atoms + ################################################################################ + Performing structural checks + --> for reference(s) + Checking reference.pdb + Checking stereo-chemistry + Average Z-Score for bond lengths: 0.13694 + Bonds outside of tolerance range: 0 out of 2654 + Bond Avg Length Avg zscore Num Bonds + C-C 1.50876 0.09299 1501 + C-N 1.42978 0.17690 635 + C-O 1.25079 0.21528 518 + Average Z-Score angle widths: 0.07562 + Angles outside of tolerance range: 0 out of 2941 + Filtering non-bonded clashes + 0 non-bonded short-range distances shorter than tolerance distance + Distances shorter than tolerance are on average shorter by: 0.00000 + --> for model(s) + Checking model.pdb + Checking stereo-chemistry + Average Z-Score for bond lengths: -0.22524 + Bonds outside of tolerance range: 0 out of 2774 + Bond Avg Length Avg zscore Num Bonds + C-C 1.50225 -0.20158 1558 + C-N 1.42294 -0.12261 666 + C-O 1.24232 -0.42115 546 + C-S 1.80215 0.20858 4 + Average Z-Score angle widths: -0.06767 + Angles outside of tolerance range: 0 out of 3079 + Filtering non-bonded clashes + 0 non-bonded short-range distances shorter than tolerance distance + Distances shorter than tolerance are on average shorter by: 0.00000 + ################################################################################ + Comparing model.pdb to reference.pdb + Chains in reference.pdb: AB + Chains in model.pdb: AB + Chemically equivalent chain-groups in reference.pdb: [['B', 'A']] + Chemically equivalent chain-groups in model.pdb: [['A', 'B']] + Chemical chain-groups mapping: {('B', 'A'): ('A', 'B')} + Identifying Symmetry Groups... + Symmetry threshold 0.1 used for angles of reference.pdb + Symmetry threshold 0.1 used for axis of reference.pdb + Symmetry threshold 0.1 used for angles of model.pdb + Symmetry threshold 0.1 used for axis of model.pdb + Selecting Symmetry Groups... + Symmetry-groups used in reference.pdb: [('B',), ('A',)] + Symmetry-groups used in model.pdb: [('A',), ('B',)] + Closed Symmetry with strict parameters + Mapping found: {'A': 'B', 'B': 'A'} + -------------------------------------------------------------------------------- + Checking consistency between model.pdb and reference.pdb + Consistency check: OK + -------------------------------------------------------------------------------- + Computing QS-score + QSscore reference.pdb, model.pdb: best: 0.90, global: 0.90 + -------------------------------------------------------------------------------- + Computing lDDT scores + lDDT settings: + Inclusion Radius: 15 + Sequence separation: 0 + Cutoffs: 0.5, 1, 2, 4 + Residue properties label: lddt + === + --> Computing lDDT between model chain B and reference chain A + Coverage: 1 (187 out of 187 residues) + Global LDDT score: 0.8257 + (877834 conserved distances out of 1063080 checked, over 4 thresholds) + --> Computing lDDT between model chain A and reference chain B + Coverage: 1 (197 out of 197 residues) + Global LDDT score: 0.7854 + (904568 conserved distances out of 1151664 checked, over 4 thresholds) + --> Computing oligomeric lDDT score + Reference reference.pdb has: 2 chains + Model model.pdb has: 2 chains + Coverage: 1 (384 out of 384 residues) + Oligo lDDT score: 0.8025 + --> Computing weighted lDDT score + Weighted lDDT score: 0.8048 + ################################################################################ + Saving output into output.json + + +This reads the model and reference file and calculates QS-score between them. +In the example above the output file looks as follows: + +.. code-block:: python + + { + "result": { + "model.pdb": { + "reference.pdb": { + "info": { + "residue_names_consistent": true, + "mapping": { + "chain_mapping": { + "A": "B", + "B": "A" + }, + "chain_mapping_scheme": "strict", + "alignments": [ + ">reference:A\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSL---QELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVR-------LGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:B\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP", + ">reference:B\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:A\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP" + ] + } + }, + "lddt": { + "oligo_lddt": { + "status": "SUCCESS", + "global_score": 0.8025223275721413, + "error": "" + }, + "weighted_lddt": { + "status": "SUCCESS", + "global_score": 0.804789180710712, + "error": "" + }, + "single_chain_lddt": [ + { + "status": "SUCCESS", + "global_score": 0.8257459402084351, + "conserved_contacts": 877834, + "reference_chain": "A", + "total_contacts": 1063080, + "error": "", + "model_chain": "B" + }, + { + "status": "SUCCESS", + "global_score": 0.7854443788528442, + "conserved_contacts": 904568, + "reference_chain": "B", + "total_contacts": 1151664, + "error": "", + "model_chain": "A" + } + ] + }, + "qs_score": { + "status": "SUCCESS", + "global_score": 0.8974384796108209, + "best_score": 0.9022811630070536, + "error": "" + } + } + } + }, + "options": { + "reference": "reference.pdb", + "structural_checks": true, + "chain_mapping": null, + "bond_tolerance": 15.0, + "parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt", + "consistency_checks": true, + "qs_score": true, + "map_nonstandard_residues": true, + "save_per_residue_scores": false, + "fault_tolerant": false, + "reference_selection": "", + "qs_rmsd": false, + "cwd": "CWD", + "inclusion_radius": 15.0, + "angle_tolerance": 15.0, + "c_alpha_only": false, + "clean_element_column": true, + "dump_suffix": ".compare.structures.pdb", + "compound_library": "Path to stage/share/openstructure/compounds.chemlib", + "dump_structures": false, + "residue_number_alignment": true, + "verbosity": 3, + "remove": [ + "oxt", + "hyd", + "unk" + ], + "molck": true, + "sequence_separation": 0, + "output": "output.json", + "model": "model.pdb", + "lddt": true, + "model_selection": "" + } + } + +If all the structures are clean one can omit all the checking steps and +calculate eg. QS-score directly: + +.. code:: console + + $ $OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output_qs.json --qs-score --residue-number-alignment + + ################################################################################ + Reading input files (fault_tolerant=False) + --> reading model from model.pdb + imported 2 chains, 396 residues, 3106 atoms; with 0 helices and 0 strands + --> reading reference from reference.pdb + imported 3 chains, 408 residues, 3011 atoms; with 0 helices and 0 strands + ################################################################################ + Comparing model.pdb to reference.pdb + Chains in reference.pdb: AB + Chains in model.pdb: AB + Chemically equivalent chain-groups in reference.pdb: [['B', 'A']] + Chemically equivalent chain-groups in model.pdb: [['A', 'B']] + Chemical chain-groups mapping: {('B', 'A'): ('A', 'B')} + Identifying Symmetry Groups... + Symmetry threshold 0.1 used for angles of reference.pdb + Symmetry threshold 0.1 used for axis of reference.pdb + Symmetry threshold 0.1 used for angles of model.pdb + Symmetry threshold 0.1 used for axis of model.pdb + Selecting Symmetry Groups... + Symmetry-groups used in reference.pdb: [('B',), ('A',)] + Symmetry-groups used in model.pdb: [('A',), ('B',)] + Closed Symmetry with strict parameters + Mapping found: {'A': 'B', 'B': 'A'} + -------------------------------------------------------------------------------- + Checking consistency between model.pdb and reference.pdb + Consistency check: OK + -------------------------------------------------------------------------------- + Computing QS-score + QSscore reference.pdb, model.pdb: best: 0.90, global: 0.90 + ################################################################################ + Saving output into output_qs.json + diff --git a/modules/doc/contributing.rst b/modules/doc/contributing.rst index c87c2daa0dc39444490415d69b489f9c1d5dbdcb..21f56abca9215334d2379e0811954735a3803b17 100644 --- a/modules/doc/contributing.rst +++ b/modules/doc/contributing.rst @@ -109,3 +109,74 @@ If you got a patch from someone else and would like to use apply it to your repo .. code-block:: bash git am < changeset.diff + +Starting Your Own Action +-------------------------------------------------------------------------------- +In OST we call scripts/ programs 'actions'. They are started by a +launcher found in your staging directory at :file:`stage/bin/ost`. This little +guy helps keeping the shell environment in the right mood to carry out your +job. So usually you will start an action by + +.. code-block:: console + + $ stage/bin/ost --help + +Starting new action **do** go for a dedicated branch for action-development. +There you can produce intermediate commits while other branches stay clean in +case you have to do some work there which needs to get public. + +After preparing your repository its time to create a file for the action. That +is a bit different than for modules. Assuming we are sitting in the +repository's root: + +.. code-block:: console + + $ touch action/ost-awesome-action + $ chmod +x action/ost-awesome-action + +Two things are important here: actions are prefixed with :file:`ost-`, so they +are recognised by the :file:`ost` launcher. Secondly, action files need to be +executable, which does not propagate if you do it **after** the first call to +``make``. + +To get the new action recognised by ``make`` to be placed in +:file:`stage/libexec/openstructure`, it has to be registered with ``cmake`` in +:file:`actions/CMakeLists.txt`: + +.. code-block:: cmake + :linenos: + + add_custom_target(actions ALL) + + ost_action_init() + ost_action(ost-awesome-action actions) + +Just add your action with its full filename with a call to `ost_action` at +the end of the file. + +Now its time to fill your action with code. Instead of reading a lot more of +explanations, it should be easy to go by examples from the :file:`actions` +directory. There are only two really important points: + +* No shebang line (``#! /usr/bin/python``) in your action! Also no + ``#! /usr/bin/env python`` or anything like this. This may lead to funny side + effects, like calling a ``python`` interpreter from outside a virtual + environment or calling a different version. Basically it may mess up the + environment your action is running in. Actions are called by :file:`ost`, + that's enough to get everything just right. + +* The code of your action belongs in the :attr:`__main__` branch of the script. + Your action will have own function definitions, variables and all the bells + and whistles. Hiding behind :attr:`__main__` keeps everything separated and + makes things easier when it gets to debugging. So just after + + .. code-block:: python + + import alot + + def functions_specific_to_your_action(...): + + if __name__ == "__main__": + <put together what your action should do here> + + start putting your action together. diff --git a/modules/doc/install.rst b/modules/doc/install.rst index 585dafe3cfd87a45afcd88c6cccec4213a83a125..b6664cfb40580cca4d28a2271ac3cd34af8c05d7 100644 --- a/modules/doc/install.rst +++ b/modules/doc/install.rst @@ -4,14 +4,22 @@ Installing OpenStructure From Source Brief Overview -------------------------------------------------------------------------------- -Compiling OpenStructure consists of several steps that are described below in -more detail. In essence, these steps are: +For a simple and portable way to use OpenStructure we recommend using a +container solution. We provide recipes to build images for +`Docker <https://www.docker.com/>`_ and +`Singularity <https://www.sylabs.io/guides/2.5.1/user-guide>`_. +The latest recipes and instructions can be found on our GitLab site +(`Docker instructions <https://git.scicore.unibas.ch/schwede/openstructure/tree/develop/docker>`_ and +`Singularity instructions <https://git.scicore.unibas.ch/schwede/openstructure/tree/develop/singularity>`_). + +If you wish to compile OpenStructure outside of a container, you need to follow +the steps which we describe in detail below. In essence, these steps are: * Installing the Dependencies * Checking out the source code from GIT * Configuring the build with cmake * Compiling an Linking - + Installing the Dependencies -------------------------------------------------------------------------------- @@ -299,16 +307,8 @@ from source. On some Linux distributions, there are issues with Qt4 and hence it may not be possible to build OpenStructure with GUI support at all. This is for instance -known to be an issue with boost versions >= 1.62. - -An additional problem arises for gcc versions >= 6. There an extra flag is -required to use the C++98 standard: - -.. code-block:: bash - - cmake . -DOPTIMIZE=ON -DENABLE_INFO=OFF -DCMAKE_CXX_FLAGS='-std=c++98' - -We hope to support Qt5 and C++11 in the next OpenStructure release. +known to be an issue with boost versions >= 1.62. We hope to support Qt5 in the +next OpenStructure release. **Ubuntu 16.04 with GUI** @@ -342,12 +342,11 @@ All the dependencies can be installed from the package manager as follows: sudo dnf install cmake eigen3-devel boost-devel libpng-devel python2-devel \ fftw-devel libtiff-devel -Fedora 26 has gcc 7 and boost 1.63 by default. Hence, we will need to disable -Qt4, the GUI and add the extra flag described above: +Here, we compile a version without GUI as follows: .. code-block:: bash - cmake . -DOPTIMIZE=ON -DENABLE_INFO=OFF -DCMAKE_CXX_FLAGS='-std=c++98' + cmake . -DOPTIMIZE=ON -DENABLE_INFO=OFF **macOS with Homebrew without GUI** diff --git a/modules/gfx/src/scene.cc b/modules/gfx/src/scene.cc index 5c1e9cbb7149115b219ceccc8e88698fa5e70e97..98b47c9cbb16ed2477cb5fb0fd001bcaee8f0c57 100644 --- a/modules/gfx/src/scene.cc +++ b/modules/gfx/src/scene.cc @@ -1097,7 +1097,7 @@ bool Scene::HasNode(const String& name) const { FindNode fn(name); this->Apply(fn); - return fn.node; + return static_cast<bool>(fn.node); } void Scene::Apply(const InputEvent& e, bool request_redraw) diff --git a/modules/gfx/src/texture.hh b/modules/gfx/src/texture.hh index aa3fec45f8f0a5d288d977dd290e40210a9c5253..cf25b9d4f83a8c86d442d4b82db0ac0346d8b433 100644 --- a/modules/gfx/src/texture.hh +++ b/modules/gfx/src/texture.hh @@ -51,7 +51,7 @@ public: Texture(const Bitmap& b); - bool IsValid() const {return d_;} + bool IsValid() const {return static_cast<bool>(d_);} float* data() {return &d_[0];} diff --git a/modules/gfx/src/vertex_array.cc b/modules/gfx/src/vertex_array.cc index 520e0d0e9c95f640c3578b99af95090e7f4aabec..847e38d14a0bcc1163ffcb383f9d7ce179a65353 100644 --- a/modules/gfx/src/vertex_array.cc +++ b/modules/gfx/src/vertex_array.cc @@ -1362,7 +1362,7 @@ void IndexedVertexArray::draw_aalines() {e3[0],e3[1],e3[2]}, {ve0.c[0],ve0.c[1],ve0.c[2],ve0.c[3]}, {ve1.c[0],ve1.c[1],ve1.c[2],ve1.c[3]}, - -0.5*(q0[2]+q1[2])}; + -0.5f*(q0[2]+q1[2])}; line_list.push_back(le); } diff --git a/modules/gui/src/data_viewer/data_viewer.hh b/modules/gui/src/data_viewer/data_viewer.hh index ca6893edc34895fde6df19e2b1344ea11affcb98..c1f18847ec36c270de0d93183342c8427dd604a1 100644 --- a/modules/gui/src/data_viewer/data_viewer.hh +++ b/modules/gui/src/data_viewer/data_viewer.hh @@ -25,6 +25,8 @@ #ifndef IMG_GUI_DATA_VIEWER_H #define IMG_GUI_DATA_VIEWER_H +#ifndef Q_MOC_RUN + #include <map> #include <ost/base.hh> @@ -42,6 +44,9 @@ #include "fft_panel.hh" #include <ost/gui/module_config.hh> +#endif + + //fw decl class QLabel; diff --git a/modules/gui/src/data_viewer/data_viewer_panel_base.hh b/modules/gui/src/data_viewer/data_viewer_panel_base.hh index 699efd372cad4dc5b0a9f26a205b4487f5b33168..be3ac007ec7550c4242e8cbc607654c5eca12fea 100644 --- a/modules/gui/src/data_viewer/data_viewer_panel_base.hh +++ b/modules/gui/src/data_viewer/data_viewer_panel_base.hh @@ -26,10 +26,10 @@ #define DATA_VIEWER_PANEL_BASE_HH_ #include <map> -#include <boost/shared_ptr.hpp> - +#ifndef Q_MOC_RUN +#include <boost/shared_ptr.hpp> #include <ost/base.hh> #include <ost/img/extent.hh> #include <ost/img/data_observer.hh> @@ -41,6 +41,8 @@ #include <ost/img/normalizer_impl.hh> +#endif + #include <QWidget> #include <QCursor> #include <QMenu> diff --git a/modules/gui/src/data_viewer/fft_panel.hh b/modules/gui/src/data_viewer/fft_panel.hh index 75a30fed87e8a1018887d10a56fd7e0465674ce0..ac0df0f56e39d148ab16735fe6e7fe3ce082244e 100644 --- a/modules/gui/src/data_viewer/fft_panel.hh +++ b/modules/gui/src/data_viewer/fft_panel.hh @@ -24,10 +24,14 @@ Author: Andreas Schenk */ +#ifndef Q_MOC_RUN + #include <ost/gui/module_config.hh> #include <ost/img/data_observer.hh> #include "data_viewer_panel_base.hh" +#endif + namespace ost { namespace img { namespace gui { class ParentDataObserver: public DataObserver diff --git a/modules/gui/src/file_type_dialog.hh b/modules/gui/src/file_type_dialog.hh index f1fe6ce5e5fc6e856b15c091b65d03e41309bda2..270d35131d96f77ebfee4de64420abe4f933590d 100644 --- a/modules/gui/src/file_type_dialog.hh +++ b/modules/gui/src/file_type_dialog.hh @@ -25,7 +25,7 @@ #include <ost/gui/module_config.hh> - +#ifndef Q_MOC_RUN #include <ost/io/mol/entity_io_handler.hh> #include <ost/io/seq/sequence_io_handler.hh> @@ -34,6 +34,8 @@ #include <ost/io/img/map_io_handler.hh> #endif +#endif + #include <QDialog> #include <QMetaType> diff --git a/modules/img/base/src/base.hh b/modules/img/base/src/base.hh index 0122a07371705cdc89808f6f3966ab67cb9e2e6a..f5d73f5f4f1aa18f897d6e4710d5108ae7380bbc 100644 --- a/modules/img/base/src/base.hh +++ b/modules/img/base/src/base.hh @@ -44,21 +44,11 @@ #pragma warning(disable:4231) #endif - #ifdef IRIX -inline Real round(Real x) {return rint(x);} -inline float roundf(float x) {return (float)rint((Real)x);} using std::cos; using std::sin; #endif -#ifndef round_function -#define round_function -#ifndef round -inline int round(Real x) {return floor(x+0.5);} -#endif -#endif - namespace ost { namespace img { // String is not always predefined diff --git a/modules/img/base/src/image_handle.cc b/modules/img/base/src/image_handle.cc index c0059be0b852490c5e50306fffab6fca18f23208..fe84239cd624c3c3e6584151ddff9ecf1dffca3d 100644 --- a/modules/img/base/src/image_handle.cc +++ b/modules/img/base/src/image_handle.cc @@ -153,7 +153,7 @@ void ImageHandle::Reset(const Extent &e, DataType type,DataDomain dom) bool ImageHandle::IsValid() const { - return impl_; + return static_cast<bool>(impl_); } long ImageHandle::MemSize() const diff --git a/modules/index.rst b/modules/index.rst index 4521313c8592ca3d57a7a91dcac7813c7a524316..9ab3fdd1ddeaa810a0cfddeaa3c28083c3f22a9c 100644 --- a/modules/index.rst +++ b/modules/index.rst @@ -30,6 +30,8 @@ OpenStructure documentation contributing table mol/alg/lddt + mol/alg/molck + actions For Starters -------------------------------------------------------------------------------- @@ -101,7 +103,11 @@ Varia **Users**: :doc:`Reporting a problem <users>` -**lDDT**: :doc:`lDDT command line executable<mol/alg/lddt>` +**lDDT**: :doc:`lDDT command line executable and Python API<mol/alg/lddt>` + +**Molck**: :doc:`Molecular Checker<mol/alg/molck>` + +**Actions**: :doc:`OST Actions<actions>` Extending OpenStructure -------------------------------------------------------------------------------- diff --git a/modules/info/src/info_handle.cc b/modules/info/src/info_handle.cc index 9b6ecbc49a7dac0caddd7347743f9dd634cdee2c..263ae222c9ed261b0de6061631dfb94e98753bee 100644 --- a/modules/info/src/info_handle.cc +++ b/modules/info/src/info_handle.cc @@ -74,7 +74,7 @@ void InfoHandle::Export(const String& file) const bool InfoHandle::IsValid() const { - return impl_; + return static_cast<bool>(impl_); } diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index f8f5302e060d360d13b2851d42dce12d3e9d2c66..ccff89f86ea08263adbed07e6bf45d598541594a 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -142,8 +142,8 @@ Loading sequence or alignment files myseq = io.LoadSequence('seq.fasta') # for obtaining a SequenceList seqlist = io.LoadSequenceList('seqs.fasta') - # or for multiple aligned fasta files use - aln = io.LoadAlignment('algnm.aln',format="clustal") + # or for multiple alignments (here from CLUSTAL) + aln = io.LoadAlignment('algnm.aln', format="clustal") For a list of file formats supported by :func:`LoadSequence` see :doc:`sequence_formats`. @@ -212,11 +212,11 @@ Saving Sequence Data .. code-block:: python # recognizes FASTA file by file extension - io.SaveSequence(myseq,'seq.fasta') + io.SaveSequence(myseq, 'seq.fasta') # for saving a SequenceList - io.SaveSequenceList(seqlist,'seqlist.fasta') - # or multiple aligned fasta files - io.SaveAlignment(aln,'algnm.aln',format="clustal") + io.SaveSequenceList(seqlist, 'seqlist.fasta') + # or for multiple alignments (here in FASTA format) + io.SaveAlignment(aln, 'aln.fasta') For a list of file formats supported by :func:`SaveSequence` see :doc:`sequence_formats`. @@ -343,3 +343,31 @@ Saving Density Maps 12 +Stereochemical Parameters +-------------------------------------------------------------------------------- + +In order to check the structure for some stereo-chemical and steric clashes +before computing the lDDT scores it is required to pass parameter file based on +Engh and Huber parameters, and on the atomic radii as defined in the Cambridge +Structural Database. OpenStructure ships with default file called +`stereo_chemical_props.txt` located in `$OST_ROOT/share/openstructure` +directory. A function :func:`~ost.io.ReadStereoChemicalPropsFile` is used to +read this file. + + + +.. function:: ReadStereoChemicalPropsFile(filename="", check=True) + + Read stereochemical parameters - if not provided a local version will be used. + + :param filename: The path to the parameter file that will be used. If set + to "", it reads the default file shipped with OpenStructure. + :type filename: :class:`str` + :param check: Raise an error when any of the resulting tables are empty. + :type check: :class:`bool` + :return: Object containing stereochemical parameters + :rtype: :class:`~ost.mol.alg.StereoChemicalProps` + +.. function:: GetStereoChemicalPropsFile() + + Get the default path to the stereochemical paramteres file. diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 730ca1b8e97ed554287158184c9d9eb20283da37..23dc32f461a08cf1633362e60bb3b9c54ecb09af 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -638,7 +638,7 @@ of the annotation available. .. method:: GetChainIntervalList() - See :attr:`chainintervalls` + See :attr:`chainintervals` .. method:: GetOperations() diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc index 302720245b63d534d88aceb2bba9c4d64bba0464..55e0dd61c3c2a0646bc63c4e810cf05a3fc50343 100644 --- a/modules/io/pymod/wrap_io.cc +++ b/modules/io/pymod/wrap_io.cc @@ -33,6 +33,7 @@ using namespace boost::python; #include <ost/io/mol/entity_io_sdf_handler.hh> #include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/dcd_io.hh> +#include <ost/io/stereochemical_params_reader.hh> using namespace ost; using namespace ost::io; @@ -44,12 +45,6 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(load_mentity_ov,LoadManagedEntity,2,3); BOOST_PYTHON_FUNCTION_OVERLOADS(load_surface_ov,LoadSurface,1,2); BOOST_PYTHON_FUNCTION_OVERLOADS(load_msurface_ov,LoadManagedSurface,2,3); -BOOST_PYTHON_FUNCTION_OVERLOADS(load_alignment_ov, - LoadAlignment, 1, 2) - -BOOST_PYTHON_FUNCTION_OVERLOADS(save_alignment_ov, - SaveAlignment, 2, 3) - void save_ent_view(const mol::EntityView& en, const String& filename, const String& format="auto") { @@ -67,6 +62,9 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(save_entity_handle_ov, BOOST_PYTHON_FUNCTION_OVERLOADS(save_entity_view_ov, save_ent_view, 2, 3) +ost::mol::alg::StereoChemicalProps (*read_props_a)(String filename, bool check) = &ReadStereoChemicalPropsFile; +ost::mol::alg::StereoChemicalProps (*read_props_b)(bool check) = &ReadStereoChemicalPropsFile; + } void export_pdb_io(); @@ -91,25 +89,31 @@ BOOST_PYTHON_MODULE(_ost_io) save_entity_handle_ov(args("entity", "filename", "format"))); def("LoadAlignment", &LoadAlignment, - load_alignment_ov(args("filename", "format"))); + (arg("filename"), arg("format")="auto")); def("AlignmentFromString", &AlignmentFromString); def("AlignmentFromStream", &AlignmentFromStream); def("AlignmentToString", &AlignmentToString); - def("LoadSequenceList", &LoadSequenceList, arg("format")="auto"); - def("LoadSequence", &LoadSequence, arg("format")="auto"); + def("LoadSequenceList", &LoadSequenceList, + (arg("filename"), arg("format")="auto")); + def("LoadSequence", &LoadSequence, + (arg("filename"), arg("format")="auto")); def("SequenceListFromString", &SequenceListFromString); def("SequenceFromString", &SequenceFromString); - def("SaveAlignment", &SaveAlignment, arg("format")="auto"); + def("SaveAlignment", &SaveAlignment, + (arg("aln"), arg("filename"), arg("format")="auto")); - def("LoadSequenceProfile", &LoadSequenceProfile, arg("format")="auto"); + def("LoadSequenceProfile", &LoadSequenceProfile, + (arg("filename"), arg("format")="auto")); def("LoadSurface",LoadSurface,load_surface_ov()); def("LoadManagedSurface",LoadManagedSurface,load_msurface_ov()); def("SequenceToString", &SequenceToString); def("SequenceListToString", &SequenceListToString); - def("SaveSequenceList", &SaveSequenceList, arg("format")="auto"); - def("SaveSequence", &SaveSequence, arg("format")="auto"); + def("SaveSequenceList", &SaveSequenceList, + (arg("seq_list"), arg("filename"), arg("format")="auto")); + def("SaveSequence", &SaveSequence, + (arg("sequence"), arg("filename"), arg("format")="auto")); def("LoadSDF", &LoadSDF); def("LoadCRD", &LoadCRD); @@ -120,6 +124,12 @@ BOOST_PYTHON_MODULE(_ost_io) def("LoadMAE", &LoadMAE); def("LoadPQR", &LoadPQR); + def("ReadStereoChemicalPropsFile", read_props_a, + (arg("filename"), arg("check")=true)); + def("ReadStereoChemicalPropsFile", read_props_b, + (arg("check")=true)); + def("GetStereoChemicalPropsFile", &GetStereoChemicalPropsFile); + export_pdb_io(); export_mmcif_io(); #if OST_IMG_ENABLED diff --git a/modules/io/src/CMakeLists.txt b/modules/io/src/CMakeLists.txt index 13d6e3c6adef7c8adbe30aa0494d2668ce7242d7..41ac18cc63766a9674afc7e61fd4acc7542ea99d 100644 --- a/modules/io/src/CMakeLists.txt +++ b/modules/io/src/CMakeLists.txt @@ -65,7 +65,7 @@ if (ENABLE_IMG) endif() #################################### -set(OST_IO_DEPENDENCIES ost_base;ost_conop;ost_seq) +set(OST_IO_DEPENDENCIES ost_base;ost_conop;ost_seq;ost_mol_alg) if (ENABLE_IMG) set(OST_IO_DEPENDENCIES ${OST_IO_DEPENDENCIES};ost_img;ost_img_alg) endif() diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt index e0792e93ee64ec6038451cad5a283a5c5c12144c..c88d99db8690af8efafc44a896f1cd017d03ff19 100644 --- a/modules/io/src/mol/CMakeLists.txt +++ b/modules/io/src/mol/CMakeLists.txt @@ -20,6 +20,7 @@ star_parser.cc mmcif_reader.cc mmcif_info.cc pdb_str.cc +stereochemical_params_reader.cc PARENT_SCOPE ) @@ -47,5 +48,6 @@ surface_io_handler.hh load_surface.hh surface_io_msms_handler.hh pdb_str.hh +stereochemical_params_reader.hh PARENT_SCOPE ) diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 27a7c9bc8406308409525862a7b1d906417d2c36..b473c96ca4ba3e0416211a74498386b39f82142b 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -97,26 +97,12 @@ void MMCifReader::SetRestrictChains(const String& restrict_chains) restrict_chains_ = restrict_chains; } -bool MMCifReader::IsValidPDBIdent(const StringRef& pdbid) -{ - if (pdbid.length() == PDBID_LEN && isdigit(pdbid[0])) { - return true; - } - return false; -} - bool MMCifReader::OnBeginData(const StringRef& data_name) { LOG_DEBUG("MCIFFReader: " << profile_); Profile profile_import("MMCifReader::OnBeginData"); - // check for PDB id - if (!this->IsValidPDBIdent(data_name)) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "No valid PDB id found for data block, read instead \'" - + data_name.str() + "\'", - this->GetCurrentLinenum())); - } + // IDs in mmCIF files can be any string, so no restrictions here this->ClearState(); @@ -1543,17 +1529,7 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX); - // some PDB files contain helix/strand entries that are adjacent to each - // other. To avoid visual artifacts, we effectively shorten the first of - // the two secondary structure segments to insert one residue of coil - // conformation. - mol::ResNum start = i->start, end = i->end; - if (helix_list_.end() != i+1 && // unit test - (*(i+1)).start.GetNum() <= end.GetNum()+1 && - (*(i+1)).end.GetNum() > end.GetNum()) { - end = mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(alpha, start, end); + chain.AssignSecondaryStructure(alpha, i->start, i->end); } for (MMCifHSVector::const_iterator i=strand_list_.begin(), @@ -1565,14 +1541,7 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure extended(mol::SecStructure::EXTENDED); - mol::ResNum start = i->start, end = i->end; - // see comment for helix assignment - if (strand_list_.end() != i+1 && // unit test - (*(i+1)).start.GetNum() <= end.GetNum()+1 && - (*(i+1)).end.GetNum() > end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(extended, start, end); + chain.AssignSecondaryStructure(extended, i->start, i->end); } } diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index edd249f38755077ac6b4669cec838de3d69e04c0..163972b60160d565f188259b9b6056aed9606a03 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -187,13 +187,6 @@ protected: } } // tested - /// \brief Check a PDB id to be of length 4 and start with a digit - /// - /// \param pdbid putative PDB id - /// - /// \return true for a valid id, false otherwise - bool IsValidPDBIdent(const StringRef& pdbid); - /// \brief fetch values identifying atoms /// /// \param[in] columns data row @@ -353,8 +346,7 @@ protected: private: /// \enum magic numbers of this class typedef enum { - PDBID_LEN=4, ///< length of a PDB id - MAX_ITEMS_IN_ROW=18, ///< count for possible items in a loop row + MAX_ITEMS_IN_ROW=18 ///< count for possible items in a loop row } MMCifMagicNos; /// \enum items of the atom_site category diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index a203234ecc275f82bceb87b75781ad1e626fb100..a0a5f3b4f46555674024a8bf4b558f7e7fd733ed 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -520,16 +520,7 @@ void PDBReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX); - // some PDB files contain helix/strand entries that are adjacent to each - // other. To avoid visual artifacts, we effectively shorten the first of the - // two secondary structure segments to insert one residue of coil - // conformation. - mol::ResNum start=i->start, end=i->end; - if (helix_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 && - (*(i+1)).end.GetNum()>end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(alpha, start, end); + chain.AssignSecondaryStructure(alpha, i->start, i->end); } for (HSList::const_iterator i=strand_list_.begin(), e=strand_list_.end(); @@ -540,13 +531,7 @@ void PDBReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure extended(mol::SecStructure::EXTENDED); - mol::ResNum start=i->start, end=i->end; - // see comment for helix assignment - if (strand_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 && - (*(i+1)).end.GetNum()>end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(extended, start, end); + chain.AssignSecondaryStructure(extended, i->start, i->end); } } @@ -644,7 +629,9 @@ void PDBReader::ParseAnisou(const StringRef& line, int line_num, } String aname(atom_name.str()); if (!curr_residue_.IsValid()) { - if (profile_.fault_tolerant || profile_.calpha_only) { + if (profile_.fault_tolerant || + profile_.calpha_only || + profile_.no_hetatms) { return; } const char* fmt_str="invalid ANISOU record for inexistent atom on line %d"; @@ -654,6 +641,7 @@ void PDBReader::ParseAnisou(const StringRef& line, int line_num, if (!atom.IsValid()) { if (profile_.fault_tolerant || profile_.calpha_only || + profile_.no_hetatms || warned_name_mismatch_) { return; } diff --git a/modules/io/src/mol/stereochemical_params_reader.cc b/modules/io/src/mol/stereochemical_params_reader.cc new file mode 100644 index 0000000000000000000000000000000000000000..0201caf573168a860ae10190cef51ec526238ed2 --- /dev/null +++ b/modules/io/src/mol/stereochemical_params_reader.cc @@ -0,0 +1,67 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <boost/filesystem/fstream.hpp> +#include <ost/platform.hh> +#include <ost/io/stereochemical_params_reader.hh> + + +namespace ost { namespace io { + +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(String filename, bool check){ + boost::filesystem::path loc(filename); + boost::filesystem::ifstream infile(loc); + if (!infile) { + std::stringstream serr; + serr << "Could not find parameter file '" << filename << "'"; + throw ost::Error(serr.str()); + } + std::vector<String> stereo_chemical_props; + String line; + while (std::getline(infile, line)) + { + std::stringstream line_stream(line); + stereo_chemical_props.push_back(line); + } + + ost::mol::alg::StereoChemicalParams bond_table; + ost::mol::alg::StereoChemicalParams angle_table; + ost::mol::alg::ClashingDistances nonbonded_table; + + bond_table = ost::mol::alg::FillStereoChemicalParams("Bond", stereo_chemical_props, check); + // Angles + angle_table = ost::mol::alg::FillStereoChemicalParams("Angle", stereo_chemical_props, check); + // Not bonded + nonbonded_table = ost::mol::alg::FillClashingDistances(stereo_chemical_props, check); + + return ost::mol::alg::StereoChemicalProps(bond_table, angle_table, nonbonded_table); +} + +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(bool check) { + String filename = GetStereoChemicalPropsFile(); + return ReadStereoChemicalPropsFile(filename, check); +} + +String GetStereoChemicalPropsFile() { + String filename; + filename = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; + return filename; +} + +}} // ns + diff --git a/modules/io/src/mol/stereochemical_params_reader.hh b/modules/io/src/mol/stereochemical_params_reader.hh new file mode 100644 index 0000000000000000000000000000000000000000..a5d33acce7888bdcd88b74ec0aecd38a120eb9d6 --- /dev/null +++ b/modules/io/src/mol/stereochemical_params_reader.hh @@ -0,0 +1,33 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_IO_STEREOCHEMICAL_PARAMS_READER_H +#define OST_IO_STEREOCHEMICAL_PARAMS_READER_H + +#include <ost/io/module_config.hh> +#include <ost/mol/alg/local_dist_diff_test.hh> + +namespace ost { namespace io { + +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(String filename, bool check=false); +ost::mol::alg::StereoChemicalProps ReadStereoChemicalPropsFile(bool check=false); +String GetStereoChemicalPropsFile(); + +}} // ns + +#endif \ No newline at end of file diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 2cfac7fcef03a65e57d20a6658ad07f90b339ab6..e4129c2ab3ccc803a5c2828fb939bbf8f726deb5 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -51,7 +51,6 @@ public: using MMCifReader::OnBeginLoop; using MMCifReader::OnEndData; - using MMCifReader::IsValidPDBIdent; using MMCifReader::ParseAtomIdent; using MMCifReader::ParseAndAddAtom; using MMCifReader::ParseEntity; @@ -113,29 +112,6 @@ conop::CompoundLibPtr SetDefaultCompoundLib() { BOOST_AUTO_TEST_SUITE( io ); -BOOST_AUTO_TEST_CASE(mmcif_isvalidpdbident) -{ - mol::EntityHandle eh=mol::CreateEntity(); - - // on changing the tests for a PDB id in mmcif files, extend this unit test - BOOST_TEST_MESSAGE(" Running mmcif_isvalidpdbident tests..."); - std::ifstream s("testfiles/mmcif/atom_site.mmcif"); - TestMMCifReaderProtected tmmcif_p(s, eh); - StringRef id = StringRef("1FOO", 4); - BOOST_TEST_MESSAGE(" Testing valid id ('"+ id.str() +"')..."); - BOOST_CHECK(tmmcif_p.IsValidPDBIdent(id)); - BOOST_TEST_MESSAGE(" done."); - id = StringRef("this is to long for a PDB id", 28); - BOOST_TEST_MESSAGE(" Testing oversized PDB id ('"+ id.str() +"')..."); - BOOST_CHECK(!tmmcif_p.IsValidPDBIdent(id)); - BOOST_TEST_MESSAGE(" done."); - id = StringRef("nFOO", 4); - BOOST_TEST_MESSAGE(" Testing PDB id with missing number ('" - + id.str() + "')..."); - BOOST_CHECK(!tmmcif_p.IsValidPDBIdent(id)); - BOOST_TEST_MESSAGE(" done."); -} - BOOST_AUTO_TEST_CASE(mmcif_trystoreidx) { mol::EntityHandle eh = mol::CreateEntity(); @@ -1327,7 +1303,7 @@ BOOST_AUTO_TEST_CASE(mmcif_test_chain_mappings) BOOST_TEST_MESSAGE(" Running mmcif_test_chain_mappings tests..."); // check compound lib - bool compound_lib_available = SetDefaultCompoundLib(); + bool compound_lib_available = static_cast<bool>(SetDefaultCompoundLib()); // load data mol::EntityHandle eh = mol::CreateEntity(); diff --git a/modules/mol/alg/doc/lddt.rst b/modules/mol/alg/doc/lddt.rst index 3d3e862c3fbc61682eb880fe3e738365b8cb1501..3201243bdaa59551cc73e543e16100c161bfb5f4 100644 --- a/modules/mol/alg/doc/lddt.rst +++ b/modules/mol/alg/doc/lddt.rst @@ -1,6 +1,6 @@ -====== +==== lDDT -====== +==== ------------------------------------- Where can I find the lDDT executable? @@ -136,15 +136,13 @@ for bonds and angles respectively. For steric clashes, the lddt executable recovers atomic radii and clashing tolerance distances from the parameter file, depending on the atomic element under -investigation. When an atomic element cannot be determined, the lddt executable -uses a default atomic radius of 1.5 Angstrom. This value can be overriden using -the -m value, passing a new radius (in Ansgstroms) to the program. +investigation. For example: .. code-block:: bash - lddt -f -p stereo_chemical_params.txt -b 8 -a 8 -m 1.0 mdl1.pdb ref.pdb + lddt -f -p stereo_chemical_params.txt -b 8 -a 8 mdl1.pdb ref.pdb ----------------------------- @@ -180,3 +178,89 @@ For example: WARNING: Verbosity levels 1 and 2 can generate a large amount of output text, especially with large structures and multiple models being evaluated. + +=============== +lDDT Python API +=============== + +One can replicate the binary using simple python script: + +.. code-block:: python + + #! /bin/env python + """Run lDDT from within script.""" + from ost.io import LoadPDB + from ost.mol.alg import (CleanlDDTReferences, + PreparelDDTGlobalRDMap, + lDDTSettings, + CheckStructure, + LocalDistDiffTest, + GetlDDTPerResidueStats, + PrintlDDTPerResidueStats, + ResidueNamesMatch) + from ost.io import ReadStereoChemicalPropsFile + + model_path = "Path to your model pdb file" + reference_path = "Path to your reference pdb file" + structural_checks = True + bond_tolerance = 12 + angle_tolerance = 12 + cutoffs = [0.5, 1.0, 2.0, 4.0] + # + # Load model and prepare its view + model = LoadPDB(model_path) + model_view = model.GetChainList()[0].Select("peptide=true") + # + # Prepare references - it should be alist of EntityView(s) + references = [LoadPDB(reference_path).CreateFullView()] + # + # Initialize settings with default parameters and print them + settings = lDDTSettings() + settings.PrintParameters() + + # + # Clean up references + CleanlDDTReferences(references) + # + # Prepare residue map from references + rdmap = PreparelDDTGlobalRDMap(references, + cutoffs=cutoffs, + sequence_separation=settings.sequence_separation, + radius=settings.radius) + # + # This part is optional and it depends on our settings parameter + if structural_checks: + stereochemical_parameters = ReadStereoChemicalPropsFile() + CheckStructure(ent=model_view, + bond_table=stereochemical_parameters.bond_table, + angle_table=stereochemical_parameters.angle_table, + nonbonded_table=stereochemical_parameters.nonbonded_table, + bond_tolerance=bond_tolerance, + angle_tolerance=angle_tolerance) + # + # Check consistency + is_cons = ResidueNamesMatch(model_view, references[0], True) + print "Consistency check: ", "OK" if is_cons else "ERROR" + # + # Calculate lDDT + LocalDistDiffTest(model_view, + references, + rdmap, + settings) + # + # Get the local scores + local_scores = GetlDDTPerResidueStats(model_view, + rdmap, + structural_checks, + settings.label) + # + # Pring local scores + PrintlDDTPerResidueStats(local_scores, structural_checks, len(cutoffs)) + +Similar effect could be obtained using lDDTScorer. See :class:`~ost.mol.alg.lDDTScorer` +for a simple example. + + +The Python API can be useful when we already have an models and references already +read in the memory and we do not want run the binary. +Please refere to specific function documentation for more details. diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 1641b6b495143c263417018e8d86224bfadca955..4636a4ce45ee911c944366545be4bc10947d82ba 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -59,6 +59,24 @@ Local Distance Test scores (lDDT, DRMSD) :returns: a tuple containing the counts of the conserved distances in the model and of all the checked distances +.. function:: LocalDistDiffTest(model, reference_list, distance_list, settings) + + Wrapper around :func:`LocalDistDiffTest` above. + + :param model: the model structure + :type model: :class:`~ost.mol.EntityView` + :param reference_list: the list of reference structures from which distances were derived + :type reference_list: :class:`list` of :class:`~ost.mol.EntityView` + :param distance_list: A residue distance map prepared with :func:`PreparelDDTGlobalRDMap` + with *reference_list* and *settings* as parameters. + :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + :returns: the Local Distance Difference Test score (conserved distances + divided by all the checked distances) + :rtype: :class:`float` + .. function:: LocalDistDiffTest(model, target, cutoff, max_dist, \ local_lddt_property_string="") @@ -287,22 +305,356 @@ Local Distance Test scores (lDDT, DRMSD) :returns: :class:`~ost.mol.alg.GlobalRDMap` + +.. function:: PreparelDDTGlobalRDMap(reference_list, cutoff_list, sequence_separation, max_dist) + + A wrapper around :func:`CreateDistanceList` and + :func:`CreateDistanceListFromMultipleReferences`. Depending on the length of + the ``reference_list`` it calls one or the other. + + :param reference_list: a list of reference structures from which distances are + derived + :type reference_list: list of :class:`~ost.mol.EntityView` + :param max_dist: the inclusion radius in Angstroms (to determine which + distances are checked for conservation) + :type max_dist: :class:`float` + :param sequence_separation: sequence separation parameter ie. maximum distance + between two sequences. + :type sequence_separation: :class:`int` + :returns: :class:`~ost.mol.alg.GlobalRDMap` + + +.. function:: CleanlDDTReferences(reference_list) + + Prepares references to be used in lDDT calculation. It checks if all references + has the same chain name and selects this chain for for further calculations. + + .. warning:: + + This function modifies the passed *reference_list* list. + + :param reference_list: A list of reference structures from which distances are + derived + :type reference_list: :class:`list` of :class:`~ost.mol.EntityView` + + +.. function:: CheckStructure(ent, \ + bond_table, \ + angle_table, \ + nonbonded_table, \ + bond_tolerance, \ + angle_tolerance) + + Perform structural checks and filters the structure. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityView` + :param bond_table: List of bond stereo chemical parameters obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillStereoChemicalParams` + :type bond_table: :class:`~ost.mol.alg.StereoChemicalParams` + :param angle_table: List of angle stereo chemical parameters obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillStereoChemicalParams` + :type angle_table: :class:`~ost.mol.alg.StereoChemicalParams` + :param nonbonded_table: Information about the clashing distances obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillClashingDistances` + :type nonbonded_table: :class:`~ost.mol.alg.ClashingDistances` + :param bond_tolerance: Tolerance in stddev for bonds + :type bond_tolerance: :class:`float` + :param angle_tolerance: Tolerance in stddev for angles + :type angle_tolerance: :class:`float` + + +.. function:: GetlDDTPerResidueStats(model, distance_list, structural_checks, label) + + Get the per-residue statistics from the lDDT calculation. + + :param model: The model structure + :type model: :class:`~ost.mol.EntityHandle` + :param distance_list: The list of distances to check for conservation + :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` + :param structural_checks: Were structural checks performed on the model? + :type structural_checks: :class:`bool` + :param label: Label used for ResidueHandle properties that store the local + scores. + :type label: :class:`str` + :returns: Per-residue local lDDT scores + :rtype: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` + + +.. function:: PrintlDDTPerResidueStats(scores, structural_checks, cutoffs_length) + + Print per-residue statistics from lDDT calculation. + + :param scores: Local lDDT scores + :type scores: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` + :param structural_checks: Where structural checks performed on the model? + :type structural_checks: :class:`bool` + :param cutoffs_length: Length of the cutoffs list used to calculate lDDT + :type cutoffs_length: :class:`int` + + +.. class:: lDDTLocalScore(cname, rname, rnum, is_assessed, quality_problems, \ + local_lddt, conserved_dist, total_dist) + + Object containing per-residue information about calculated lDDT. + + :param cname: Sets :attr:`cname` + :param rname: Sets :attr:`rname` + :param rnum: Sets :attr:`rnum` + :param is_assessed: Sets :attr:`is_assessed` + :param quality_problems: Sets :attr:`quality_problems` + :param local_lddt: Sets :attr:`local_lddt` + :param conserved_dist: Sets :attr:`conserved_dist` + :param total_dist: Sets :attr:`total_dist` + + .. attribute:: cname + + Chain name. + + :type: :class:`str` + + .. attribute:: rname + + Residue name. + + :type: :class:`str` + + .. attribute:: rnum + + Residue number. + + :type: :class:`int` + + .. attribute:: is_assessed + + Is the residue taken into account? Yes or No. + + :type: :class:`str` + + .. attribute:: quality_problems + + Does the residue have quality problems? + No if there are no problems, NA if the problems were not assessed, Yes if + there are sidechain problems and Yes+ if there are backbone problems. + + :type: :class:`str` + + .. attribute:: local_lddt + + Local lDDT score for residue. + + :type: :class:`float` + + .. attribute:: conserved_dist + + Number of conserved distances. + + :type: :class:`int` + + .. attribute:: total_dist + + Total number of distances. + + :type: :class:`int` + + .. method:: ToString(structural_checks) + + :return: String representation of the lDDTLocalScore object. + :rtype: :class:`str` + + :param structural_checks: Where structural checks applied during calculations? + :type structural_checks: bool + + .. method:: GetHeader(structural_checks, cutoffs_length) + + Get the names of the fields as printed by ToString method. + + :param structural_checks: Where structural checks applied during calculations? + :type structural_checks: bool + :param cutoffs_length: Length of the cutoffs list used for calculations + :type cutoffs_length: int + +.. class:: StereoChemicalProps(bond_table, angle_table, nonbonded_table) -.. class:: UniqueAtomIdentifier + Object containing the stereo-chemical properties read form stereochmical_props.txt + file. - Object containing enough information to uniquely identify an atom in a structure + :param bond_table: Sets :attr:`bond_table` + :param angle_table: Sets :attr:`angle_table` + :param nonbonded_table: Sets :attr:`nonbonded_table` - .. method:: UniqueAtomIdentifier(chain,residue_number,residue_name,atom_name) + .. attribute:: bond_table + + Object containing bond parameters + + :type: :class:`~ost.mol.alg.StereoChemicalParams` + + .. attribute:: angle_table + + Object containing angle parameters - Creates an UniqueAtomIdentifier object starting from relevant atom information + :type: :class:`~ost.mol.alg.StereoChemicalParams` + + .. attribute:: nonbonded_table + + Object containing clashing distances parameters + + :type: :class:`~ost.mol.alg.ClashingDistances` + + +.. class:: lDDTSettings(radius=15, \ + sequence_separation=0, \ + cutoffs=(0.5, 1.0, 2.0, 4.0), \ + label="locallddt") + + Object containing the settings used for lDDT calculations. + + :param radius: Sets :attr:`radius`. + :param sequence_separation: Sets :attr:`sequence_separation`. + :param cutoffs: Sets :attr:`cutoffs`. + :param label: Sets :attr:`label`. + + .. attribute:: radius + + Distance inclusion radius. + + :type: :class:`float` + + .. attribute:: sequence_separation + + Sequence separation. + + :type: :class:`int` + + .. attribute:: cutoffs + + List of thresholds used to determine distance conservation. + + :type: :class:`list` of :class:`float` + + .. attribute:: label + + The base name for the ResidueHandle properties that store the local scores. + + :type: :class:`str` + + .. method:: PrintParameters() + + Print settings. + + .. method:: ToString() + + :return: String representation of the lDDTSettings object. + :rtype: :class:`str` + +.. class:: lDDTScorer(reference, model, settings) + + Object to compute lDDT scores. + + Example usage. + + .. code:: python + + #! /bin/env python + """Run lDDT from within script.""" + from ost.io import LoadPDB + from ost.mol.alg import (CleanlDDTReferences, + lDDTSettings, lDDTScorer) + + ent_full = LoadPDB('3ia3', remote=True) + model_view = ent_full.Select('cname=A') + references = [ent_full.Select('cname=C')] + + # + # Initialize settings with default parameters and print them + settings = lDDTSettings() + settings.PrintParameters() + + # Clean up references + CleanlDDTReferences(references) + # + # Calculate lDDT + scorer = lDDTScorer(references=references, model=model_view, settings=settings) + print "Global score:", scorer.global_score + scorer.PrintPerResidueStats() + + :param references: Sets :attr:`references` + :param model: Sets :attr:`model` + :param settings: Sets :attr:`settings` + + .. attribute:: references + + A list of reference structures. + + :type: list(:class:`~ost.mol.EntityView`) + + .. attribute:: model + + A model structure. + + :type: :class:`~ost.mol.EntityView` + + .. attribute:: settings + + Settings used to calculate lDDT. + + :type: :class:`~ost.mol.alg.lDDTSettings` + + .. attribute:: global_dist_list + + Global map of residue properties. + + :type: :class:`~ost.mol.alg.GlobalRDMap` + + .. attribute:: global_score + + Global lDDT score. It is calculated as :attr:`conserved_contacts` divided + by :attr:`total_contacts`. + + :type: float + + .. attribute:: conserved_contacts + + Number of conserved distances. + + :type: int + + .. attribute:: total_contacts + + Number of total distances. + + :type: + + .. attribute:: local_scores + + Local scores. For each of the residue lDDT is it is calculated as residue + conserved contacts divided by residue total contacts. + + :type: list(:class:`~ost.mol.alg.lDDTLocalScore`) + + .. attribute:: is_valid + + Is the calculated score valid? + + :type: bool + + .. method:: PrintPerResidueStats + + Print per-residue statistics. + + +.. class:: UniqueAtomIdentifier(chain, residue_number, residue_name, atom_name) - :param chain: a string containing the name of the chain to which the atom - belongs - :param residue_number: the number of the residue to which the atom belongs - :type residue_number: :class:`~ost.mol.ResNum` - :param residue_name: a string containing the name of the residue to which - the atom belongs - :param atom_name: a string containing the name of the atom + Object containing enough information to uniquely identify an atom in a + structure. + + :param chain: A string containing the name of the chain to which the atom + belongs + :param residue_number: The number of the residue to which the atom belongs + :type residue_number: :class:`~ost.mol.ResNum` + :param residue_name: A string containing the name of the residue to which + the atom belongs + :param atom_name: A string containing the name of the atom .. method:: GetChainName() @@ -957,6 +1309,102 @@ Algorithms on Structures :class:`~ost.mol.EntityHandle` + +.. class:: FindMemParam + + Result object for the membrane detection algorithm described below + + .. attribute:: axis + + initial search axis from which optimal membrane slab could be found + + .. attribute:: tilt_axis + + Axis around which we tilt the membrane starting from the initial axis + + .. attribute:: tilt + + Angle to tilt around tilt axis + + .. attribute:: angle + + After the tilt operation we perform a rotation around the initial axis + with this angle to get the final membrane axis + + .. attribute:: membrane_axis + + The result of applying the tilt and rotation procedure described above. + The membrane_axis is orthogonal to the membrane plane and has unit length. + + .. attribute:: pos + + Real number that describes the membrane center point. To get the actual + position you can do: pos * membrane_axis + + .. attribute:: width + + Total width of the membrane in A + + .. attribute:: energy + + Pseudo energy of the implicit solvation model + + .. attribute:: membrane_representation + + Dummy atoms that represent the membrane. This entity is only valid if + the according flag has been set to True when calling FindMembrane. + + +.. method:: FindMembrane(ent, assign_membrane_representation=True, fast=False) + + Estimates the optimal membrane position of a protein by using an implicit + solvation model. The original algorithm and the used energy function are + described in: Lomize AL, Pogozheva ID, Lomize MA, Mosberg HI (2006) + Positioning of proteins in membranes: A computational approach. + + There are some modifications in this implementation and the procedure is + as follows: + + * Initial axis are constructed that build the starting point for initial + parameter grid searches. + + * For every axis, the protein is rotated so that the axis builds the z-axis + + * In order to exclude internal hydrophilic pores, only the outermost atoms + with respect the the z-axis enter an initial grid search + * The width and position of the membrane is optimized for different + combinations of tilt and rotation angles (further described in + :class:`FindMemParam`). The top 20 parametrizations + (only top parametrization if *fast* is True) are stored for further + processing. + + * The 20 best membrane parametrizations from the initial grid search + (only the best if *fast* is set to True) enter a final + minimization step using a Levenberg-Marquardt minimizer. + + + :param ent: Entity of a transmembrane protein, you'll get weird + results if this is not the case. The energy term + of the result is typically a good indicator whether + *ent* is an actual transmembrane protein. + :type ent: :class:`ost.mol.EntityHandle` / :class:`ost.mol.EntityView` + + :param assign_membrane_representation: Whether to construct a membrane + representation using dummy atoms + + :type assign_membrane_representation: :class:`bool` + + :param fast: If set to false, the 20 best results of the initial grid + search undergo a Levenberg-Marquardt minimization and + the parametrization with optimal minimized energy is + returned. + If set to yes, only the best result of the initial grid + search is selected and returned after + Levenberg-Marquardt minimization. + + :returns: The results object + :rtype: :class:`ost.mol.alg.FindMemParam` + .. _traj-analysis: Trajectory Analysis @@ -1187,3 +1635,288 @@ used to skip frames in the analysis. .. automodule:: ost.mol.alg.structure_analysis :members: + +.. _mapping-functions: + +Mapping functions +-------------------------------------------------------------------------------- + +.. currentmodule:: ost.mol.alg + +The following functions help to convert one residue into another by reusing as +much as possible from the present atoms. They are mainly meant to map from +standard amino acid to other standard amino acids or from modified amino acids +to standard amino acids. + +.. function:: CopyResidue(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` using the residue names + as guide to decide which of the atoms should be copied. If ``src_res`` and + ``dst_res`` have the same name, or ``src_res`` is a modified version of + ``dst_res`` (i.e. have the same single letter code), CopyConserved will be + called, otherwise CopyNonConserved will be called. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + :param editor: Editor used to modify *dst_res*. + :type editor: :class:`~ost.mol.XCSEditor` + + :returns: True if the residue could be copied, False if not. + +.. function:: CopyConserved(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent + amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. + + If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied + to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the + modification is a pure addition (e.g. the phosphate group of phosphoserine), + the modification is stripped off and all other heavy atoms are copied to + ``dst_res``. If the modification is not a pure addition, only the backbone + heavy atoms are copied to ``dst_res``. + + Additionally, the selenium atom of ``MSE`` is converted to sulphur. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + :param editor: Editor used to modify *dst_res*. + :type editor: :class:`~ost.mol.XCSEditor` + + :returns: A tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + +.. function:: CopyNonConserved(src_res, dst_res, editor) + + Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` + to ``dst_res``. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + :param editor: Editor used to modify *dst_res*. + :type editor: :class:`~ost.mol.XCSEditor` + + :returns: A tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + + +Molecular Checker (Molck) +-------------------------------------------------------------------------------- + +Programmatic usage +################## + +Molecular Checker (Molck) could be called directly from the code using Molck +function: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import MolckSettings, Molck + + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + + # Using Molck function + ent = LoadPDB(pdbid) + ms = MolckSettings(rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False, + map_nonstd_res=False, + assign_elem=True) + Molck(ent, lib, ms) + SavePDB(ent, "<OUTPUT PATH>") + +It can also be split into subsequent commands for greater controll: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import (RemoveAtoms, MapNonStandardResidues, + CleanUpElementColumn) + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + map_nonstd = False + + # Using function chain + ent = LoadPDB(pdbid) + if map_nonstd: + MapNonStandardResidues(lib=lib, ent=ent) + + RemoveAtoms(lib=lib, + ent=ent, + rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False) + + CleanUpElementColumn(lib=lib, ent=ent) + SavePDB(ent, "<OUTPUT PATH>") + +API +### + +.. class:: MolckSettings(rm_unk_atoms=False, rm_non_std=False, \ + rm_hyd_atoms=True, rm_oxt_atoms=False, \ + rm_zero_occ_atoms=False, colored=False, \ + map_nonstd_res=True, assign_elem=True) + + Stores settings used for Molecular Checker. + + :param rm_unk_atoms: Sets :attr:`rm_unk_atoms`. + :param rm_non_std: Sets :attr:`rm_non_std`. + :param rm_hyd_atoms: Sets :attr:`rm_hyd_atoms`. + :param rm_oxt_atoms: Sets :attr:`rm_oxt_atoms`. + :param rm_zero_occ_atoms: Sets :attr:`rm_zero_occ_atoms`. + :param colored: Sets :attr:`colored`. + :param map_nonstd_res: Sets :attr:`map_nonstd_res`. + :param assign_elem: Sets :attr:`assign_elem`. + + .. attribute:: rm_unk_atoms + + Remove unknown and atoms not following the nomenclature. + + :type: :class:`bool` + + .. attribute:: rm_non_std + + Remove all residues not one of the 20 standard amino acids + + :type: :class:`bool` + + .. attribute:: rm_hyd_atoms + + Remove hydrogen atoms + + :type: :class:`bool` + + .. attribute:: rm_oxt_atoms + + Remove terminal oxygens + + :type: :class:`bool` + + .. attribute:: rm_zero_occ_atoms + + Remove atoms with zero occupancy + + :type: :class:`bool` + + .. attribute:: colored + + Whether output should be colored + + :type: :class:`bool` + + .. attribute:: map_nonstd_res + + Maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER + + :type: :class:`bool` + + .. attribute:: assign_elem + + Clean up element column + + :type: :class:`bool` + + + .. method:: ToString() + + :return: String representation of the MolckSettings. + :rtype: :class:`str` + +.. warning:: + + The API here is set such that the functions modify the passed structure *ent* + in-place. If this is not ok, please work on a copy of the structure. + +.. function:: Molck(ent, lib, settings) + + Runs Molck on provided entity. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param settings: Molck settings + :type settings: :class:`MolckSettings` + + +.. function:: MapNonStandardResidues(ent, lib) + + Maps modified residues back to the parent amino acid, for example MSE -> MET. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + +.. function:: RemoveAtoms(ent, lib, rm_unk_atoms=False, rm_non_std=False, \ + rm_hyd_atoms=True, rm_oxt_atoms=False, \ + rm_zero_occ_atoms=False, colored=False) + + Removes atoms and residues according to some criteria. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param rm_unk_atoms: See :attr:`MolckSettings.rm_unk_atoms` + :param rm_non_std: See :attr:`MolckSettings.rm_non_std` + :param rm_hyd_atoms: See :attr:`MolckSettings.rm_hyd_atoms` + :param rm_oxt_atoms: See :attr:`MolckSettings.rm_oxt_atoms` + :param rm_zero_occ_atoms: See :attr:`MolckSettings.rm_zero_occ_atoms` + :param colored: See :attr:`MolckSettings.colored` + +.. function:: CleanUpElementColumn(ent, lib) + + Clean up element column. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` diff --git a/modules/mol/alg/doc/molck.rst b/modules/mol/alg/doc/molck.rst new file mode 100644 index 0000000000000000000000000000000000000000..1dfaf6addeb0eca91744505ad34d9efb4a0eb6a2 --- /dev/null +++ b/modules/mol/alg/doc/molck.rst @@ -0,0 +1,66 @@ +========================= +Molecular Checker (Molck) +========================= + +-------------------------------------- +Where can I find the Molck executable? +-------------------------------------- + +The Molck executable can be found at <YOUR-OST-STAGE-DIR>/bin + +----------- +Basic Usage +----------- + +To check one PDB file (struc1.pdb) with Molck, use the following command: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb + +The checked and cleaned file will be saved by default ad struc1-molcked.pdb. + +Similarly it is possible to check a list of PDB files: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb struc2.pdb struc3.pdb + + +----------- +All Options +----------- + +The molck executable supports several other command line options, +please find them following: + +.. code-block:: bash + + usage: molck [options] file1.pdb [file2.pdb [...]] + options + --complib=path location of the compound library file. If not provided, the + following locations are searched in this order: + 1. Working directory, + 2. OpenStructure standard library location (if the + executable is part of a standard OpenStructure installation) + --rm=<a>,<b> remove atoms and residues matching some criteria: + - zeroocc - Remove atoms with zero occupancy + - hyd - Remove hydrogen atoms + - oxt - Remove terminal oxygens + - nonstd - Remove all residues not one of the 20 standard amino acids + - unk - Remove unknown and atoms not following the nomenclature + --fix-ele clean up element column + --stdout write cleaned file(s) to stdout + --out=filename write cleaned file(s) to disk. % characters in the filename are + replaced with the basename of the input file without extension. + Default: %-molcked.pdb + --color=auto|on|off whether output should be colored + --map-nonstd maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER. + +================ +Molck Python API +================ + +Within OST, one can also call the :func:`~ost.mol.alg.Molck` function directly +on entities to get the same effect as with the binary. diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index fa2942575835fd70e480563148784918bad58122..f4d1fdb71b68519b303d5df13dea34a2d529c230 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -7,6 +7,9 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_contact_overlap.cc export_accessibility.cc export_sec_structure.cc + export_non_standard.cc + export_molck.cc + export_membrane.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_membrane.cc b/modules/mol/alg/pymod/export_membrane.cc new file mode 100644 index 0000000000000000000000000000000000000000..fe8ebe52de9e43c2a98499718f1951015794453f --- /dev/null +++ b/modules/mol/alg/pymod/export_membrane.cc @@ -0,0 +1,59 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2017 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <boost/python.hpp> + +#include <ost/mol/alg/find_membrane.hh> + +using namespace boost::python; + +namespace{ + ost::mol::alg::FindMemParam FindMembraneView(ost::mol::EntityView& v, + bool assign_membrane_representation, + bool fast) { + return ost::mol::alg::FindMembrane(v, assign_membrane_representation, fast); + } + + ost::mol::alg::FindMemParam FindMembraneHandle(ost::mol::EntityHandle& h, + bool assign_membrane_representation, + bool fast) { + return ost::mol::alg::FindMembrane(h, assign_membrane_representation, fast); + } +} + +void export_find_membrane() { + + class_<ost::mol::alg::FindMemParam>("FindMemParam", no_init) + .def_readonly("tilt", &ost::mol::alg::FindMemParam::tilt) + .def_readonly("angle", &ost::mol::alg::FindMemParam::angle) + .def_readonly("width", &ost::mol::alg::FindMemParam::width) + .def_readonly("pos", &ost::mol::alg::FindMemParam::pos) + .def_readonly("energy", &ost::mol::alg::FindMemParam::energy) + .def_readonly("axis", &ost::mol::alg::FindMemParam::axis) + .def_readonly("tilt_axis", &ost::mol::alg::FindMemParam::tilt_axis) + .def_readonly("membrane_axis", &ost::mol::alg::FindMemParam::GetMembraneAxis) + .def_readonly("membrane_representation", &ost::mol::alg::FindMemParam::membrane_representation) + ; + + def("FindMembrane", FindMembraneView, (arg("ent"), arg("assign_membrane_representation")=true, + arg("fast")=false)); + def("FindMembrane", FindMembraneHandle, (arg("ent"), arg("assign_membrane_representation")=true, + arg("fast")=false)); +} diff --git a/modules/mol/alg/pymod/export_molck.cc b/modules/mol/alg/pymod/export_molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e5e27dac593ebaf321c1b2fd5558ff53f209f543 --- /dev/null +++ b/modules/mol/alg/pymod/export_molck.cc @@ -0,0 +1,137 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <stdexcept> +#include <boost/python.hpp> +#include <boost/python/raw_function.hpp> + +using namespace boost::python; + +#include <ost/mol/alg/molck.hh> + +using namespace ost::mol::alg; + +namespace { + +object MolckSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + bool rm_unk_atoms = false; + if(kwargs.contains("rm_unk_atoms")){ + rm_unk_atoms = extract<bool>(kwargs["rm_unk_atoms"]); + kwargs["rm_unk_atoms"].del(); + } + + bool rm_non_std = false; + if(kwargs.contains("rm_non_std")){ + rm_non_std = extract<bool>(kwargs["rm_non_std"]); + kwargs["rm_non_std"].del(); + } + + bool rm_hyd_atoms = true; + if(kwargs.contains("rm_hyd_atoms")){ + rm_hyd_atoms = extract<bool>(kwargs["rm_hyd_atoms"]); + kwargs["rm_hyd_atoms"].del(); + } + + bool rm_oxt_atoms = false; + if(kwargs.contains("rm_oxt_atoms")){ + rm_oxt_atoms = extract<bool>(kwargs["rm_oxt_atoms"]); + kwargs["rm_oxt_atoms"].del(); + } + + bool rm_zero_occ_atoms = false; + if(kwargs.contains("rm_zero_occ_atoms")){ + rm_zero_occ_atoms = extract<bool>(kwargs["rm_zero_occ_atoms"]); + kwargs["rm_zero_occ_atoms"].del(); + } + + bool colored = false; + if(kwargs.contains("colored")){ + colored = extract<bool>(kwargs["colored"]); + kwargs["colored"].del(); + } + + bool map_nonstd_res = true; + if(kwargs.contains("map_nonstd_res")){ + map_nonstd_res = extract<bool>(kwargs["map_nonstd_res"]); + kwargs["map_nonstd_res"].del(); + } + + bool assign_elem = true; + if(kwargs.contains("assign_elem")){ + assign_elem = extract<bool>(kwargs["assign_elem"]); + kwargs["assign_elem"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: rm_unk_atoms, rm_non_std, rm_hyd_atoms, "; + ss << "rm_oxt_atoms, rm_zero_occ_atoms, colored, map_nonstd_res, "; + ss << "assign_elem!"; + throw std::invalid_argument(ss.str()); + } + + + return self.attr("__init__")(rm_unk_atoms, + rm_non_std, + rm_hyd_atoms, + rm_oxt_atoms, + rm_zero_occ_atoms, + colored, + map_nonstd_res, + assign_elem); +}} + +void export_Molck() +{ + class_<MolckSettings>("MolckSettings", no_init) + .def("__init__", raw_function(MolckSettingsInitWrapper)) + .def(init<bool , bool, bool, bool, bool, bool, bool, bool>()) + .def("ToString", &MolckSettings::ToString) + .def("__repr__", &MolckSettings::ToString) + .def("__str__", &MolckSettings::ToString) + .def_readwrite("rm_unk_atoms", &MolckSettings::rm_unk_atoms) + .def_readwrite("rm_non_std", &MolckSettings::rm_non_std) + .def_readwrite("rm_hyd_atoms", &MolckSettings::rm_hyd_atoms) + .def_readwrite("rm_oxt_atoms", &MolckSettings::rm_oxt_atoms) + .def_readwrite("rm_zero_occ_atoms", &MolckSettings::rm_zero_occ_atoms) + .def_readwrite("colored", &MolckSettings::colored) + .def_readwrite("map_nonstd_res", &MolckSettings::map_nonstd_res) + .def_readwrite("assign_elem", &MolckSettings::assign_elem); + + def("MapNonStandardResidues", &MapNonStandardResidues, (arg("ent"), + arg("lib"))); + + def("RemoveAtoms", &RemoveAtoms, (arg("ent"), + arg("lib"), + arg("rm_unk_atoms")=false, + arg("rm_non_std")=false, + arg("rm_hyd_atoms")=true, + arg("rm_oxt_atoms")=false, + arg("rm_zero_occ_atoms")=false, + arg("colored")=false)); + + def("CleanUpElementColumn", &CleanUpElementColumn, (arg("ent"), arg("lib"))); + + def("Molck", &Molck, (arg("ent"), arg("lib"), arg("settings"))); +} diff --git a/modules/conop/pymod/export_non_standard.cc b/modules/mol/alg/pymod/export_non_standard.cc similarity index 96% rename from modules/conop/pymod/export_non_standard.cc rename to modules/mol/alg/pymod/export_non_standard.cc index 50b1fd2d6e169e31ff5e192e03b19b98541bf89d..8d485b169ecbd290483daf5f0ab5ca3e06e8da80 100644 --- a/modules/conop/pymod/export_non_standard.cc +++ b/modules/mol/alg/pymod/export_non_standard.cc @@ -18,11 +18,11 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> #include <ost/mol/mol.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/nonstandard.hh> using namespace boost::python; -using namespace ost::conop; +using namespace ost::mol::alg; using namespace ost::mol; object copy_conserved_handle(ResidueHandle src_res, ResidueHandle dst_res, diff --git a/modules/mol/alg/pymod/qsscoring.py b/modules/mol/alg/pymod/qsscoring.py index 16635d7025e330345dfc20109cf19fd63ffc4257..228fbb43208b9fee2b1b763275636e6a64849945 100644 --- a/modules/mol/alg/pymod/qsscoring.py +++ b/modules/mol/alg/pymod/qsscoring.py @@ -1,5 +1,6 @@ """ -Scoring of quaternary structures as in Martino's 2017 paper. +Scoring of quaternary structures (QS). The QS scoring is according to the paper +by `Bertoni et al. <https://dx.doi.org/10.1038/s41598-017-09654-8>`_. .. note :: @@ -16,9 +17,11 @@ Scoring of quaternary structures as in Martino's 2017 paper. Authors: Gerardo Tauriello, Martino Bertoni """ -from ost import mol, geom, conop, seq, settings +from ost import mol, geom, conop, seq, settings, PushVerbosityLevel from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug from ost.bindings.clustalw import ClustalW +from ost.mol.alg import lDDTScorer +from ost.seq.alg.renumber import Renumber import numpy as np from scipy.misc import factorial from scipy.special import binom @@ -89,13 +92,27 @@ class QSscorer: -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared -> weight_extra_all = sum(w(d)) for all non-shared -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else - + + In the formulas above: + + * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for + "shared" contacts). + * "mapped": we could map chains of two structures and align residues in + :attr:`alignments`. + * "shared": pairs of residues which are "mapped" and have + "inter-chain contact" in both structures. + * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A + (fallback to CA-CA if :attr:`calpha_only` is True). + * "w(d)": weighting function (prob. of 2 res. to interact given CB distance) + from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_. + :param ent_1: First structure to be scored. :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView` :param ent_2: Second structure to be scored. :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView` + :param res_num_alignment: Sets :attr:`res_num_alignment` :raises: :class:`QSscoreError` if input structures are invalid or are monomers or have issues that make it impossible for a QS score to be computed. @@ -125,8 +142,15 @@ class QSscorer: of symmetries and chain mappings. By default it is set to 100. :type: :class:`int` + + .. attribute:: res_num_alignment + + Forces each alignment in :attr:`alignments` to be based on residue numbers + instead of using a global BLOSUM62-based alignment. + + :type: :class:`bool` """ - def __init__(self, ent_1, ent_2): + def __init__(self, ent_1, ent_2, res_num_alignment=False): # generate QSscoreEntity objects? if isinstance(ent_1, QSscoreEntity): self.qs_ent_1 = ent_1 @@ -147,6 +171,7 @@ class QSscorer: self.qs_ent_1.SetName(self.qs_ent_1.original_name) self.qs_ent_2.SetName(self.qs_ent_2.original_name) # set other public attributes + self.res_num_alignment = res_num_alignment self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only self.max_ca_per_chain_for_cm = 100 # init cached stuff @@ -156,14 +181,12 @@ class QSscorer: self._symm_1 = None self._symm_2 = None self._chain_mapping = None + self._chain_mapping_scheme = None self._alignments = None self._mapped_residues = None self._global_score = None self._best_score = None self._superposition = None - self._lddt_score = None - self._lddt_mdl = None - self._lddt_ref = None self._clustalw_bin = None @property @@ -185,9 +208,8 @@ class QSscorer: :attr:`qs_ent_1` and value = :class:`tuple` of chain names in :attr:`qs_ent_2`. - :raises: :class:`QSscoreError` if we end up having less than 2 chains for - either entity in the mapping (can happen if chains do not have CA - atoms). + :raises: :class:`QSscoreError` if we end up having no chains for either + entity in the mapping (can happen if chains do not have CA atoms). """ if self._chem_mapping is None: self._chem_mapping = _GetChemGroupsMapping(self.qs_ent_1, self.qs_ent_2) @@ -342,12 +364,41 @@ class QSscorer: to find a chain mapping. """ if self._chain_mapping is None: - self._chain_mapping = _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2, - self.symm_1, self.symm_2, - self.chem_mapping) + self._chain_mapping, self._chain_mapping_scheme = \ + _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2, self.symm_1, + self.symm_2, self.chem_mapping) LogInfo('Mapping found: %s' % str(self._chain_mapping)) return self._chain_mapping + @property + def chain_mapping_scheme(self): + """Mapping scheme used to get :attr:`chain_mapping`. + + Possible values: + + - 'strict': 80% overlap needed within 4 Angstrom (overlap based mapping). + - 'tolerant': 40% overlap needed within 6 Angstrom (overlap based mapping). + - 'permissive': 20% overlap needed within 8 Angstrom (overlap based + mapping). It's best if you check mapping manually! + - 'extensive': Extensive search used for mapping detection (fallback). This + approach has known limitations and may be removed in future versions. + Mapping should be checked manually! + - 'user': :attr:`chain_mapping` was set by user before first use of this + attribute. + + :getter: Computed with :attr:`chain_mapping` on first use (cached) + :type: :class:`str` + :raises: :class:`QSscoreError` as in :attr:`chain_mapping`. + """ + if self._chain_mapping_scheme is None: + # default: user provided + self._chain_mapping_scheme = 'user' + # get chain mapping and make sure internal variable is set + # -> will not compute and only update _chain_mapping if user provided + # -> will compute and overwrite _chain_mapping_scheme else + self._chain_mapping = self.chain_mapping + return self._chain_mapping_scheme + @property def alignments(self): """List of successful sequence alignments using :attr:`chain_mapping`. @@ -355,8 +406,15 @@ class QSscorer: There will be one alignment for each mapped chain and they are ordered by their chain names in :attr:`qs_ent_1`. - The sequences of the alignments have views attached into - :attr:`QSscoreEntity.ent` of :attr:`qs_ent_1` and :attr:`qs_ent_2`. + The first sequence of each alignment belongs to :attr:`qs_ent_1` and the + second one to :attr:`qs_ent_2`. The sequences are named according to the + mapped chain names and have views attached into :attr:`QSscoreEntity.ent` + of :attr:`qs_ent_1` and :attr:`qs_ent_2`. + + If :attr:`res_num_alignment` is False, each alignment is performed using a + global BLOSUM62-based alignment. Otherwise, the positions in the alignment + sequences are simply given by the residue number so that residues with + matching numbers are aligned. :getter: Computed on first use (cached) :type: :class:`list` of :class:`~ost.seq.AlignmentHandle` @@ -364,7 +422,8 @@ class QSscorer: if self._alignments is None: self._alignments = _GetMappedAlignments(self.qs_ent_1.ent, self.qs_ent_2.ent, - self.chain_mapping) + self.chain_mapping, + self.res_num_alignment) return self._alignments @property @@ -394,6 +453,7 @@ class QSscorer: :getter: Computed on first use (cached) :type: :class:`float` + :raises: :class:`QSscoreError` if only one chain is mapped """ if self._global_score is None: self._ComputeScores() @@ -409,6 +469,7 @@ class QSscorer: :getter: Computed on first use (cached) :type: :class:`float` + :raises: :class:`QSscoreError` if only one chain is mapped """ if self._best_score is None: self._ComputeScores() @@ -434,65 +495,6 @@ class QSscorer: % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd)) return self._superposition - @property - def lddt_score(self): - """The multi-chain lDDT score. - - .. note:: - - lDDT is not considering over-prediction (i.e. extra chains) and hence is - not symmetric. Here, we consider :attr:`qs_ent_1` as the reference and - :attr:`qs_ent_2` as the model. The alignments from :attr:`alignments` are - used to map residue numbers and chains. - - The score is computed with OST's :func:`~ost.mol.alg.LocalDistDiffTest` - function with a single distance threshold of 2 A and an inclusion radius of - 8 A. You can use :attr:`lddt_mdl` and :attr:`lddt_ref` to get entities on - which you can call any other lDDT function with any other set of parameters. - - :getter: Computed on first use (cached) - :type: :class:`float` - """ - if self._lddt_score is None: - self._ComputeLDDT() - return self._lddt_score - - @property - def lddt_mdl(self): - """The model entity used for lDDT scoring (:attr:`lddt_score`) and annotated - with local scores. - - Local scores are available as residue properties named 'lddt' and on each - atom as a B-factor. Only CA atoms are considered if :attr:`calpha_only` is - True, otherwise this is an all-atom score. - - Since, the lDDT computation requires a single chain with mapped residue - numbering, all chains are appended into a single chain X with unique residue - numbers according to the column-index in the alignment. The alignments are - in the same order as they appear in :attr:`alignments`. Additional residues - are appended at the end of the chain with unique residue numbers. - - :getter: Computed on first use (cached) - :type: :class:`~ost.mol.EntityHandle` - """ - if self._lddt_mdl is None: - self._ComputeLDDT() - return self._lddt_mdl - - @property - def lddt_ref(self): - """The reference entity used for lDDT scoring (:attr:`lddt_score`). - - This is a single chain X with residue numbers matching ones in - :attr:`lddt_mdl` where aligned and unique numbers for additional residues. - - :getter: Computed on first use (cached) - :type: :class:`~ost.mol.EntityHandle` - """ - if self._lddt_ref is None: - self._ComputeLDDT() - return self._lddt_ref - @property def clustalw_bin(self): """ @@ -506,6 +508,21 @@ class QSscorer: self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2')) return self._clustalw_bin + def GetOligoLDDTScorer(self, settings, penalize_extra_chains=True): + """ + :return: :class:`OligoLDDTScorer` object, setup for this QS scoring problem. + :param settings: Passed to :class:`OligoLDDTScorer` constructor. + :param penalize_extra_chains: Passed to :class:`OligoLDDTScorer` constructor. + """ + if penalize_extra_chains: + return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent, + self.alignments, self.calpha_only, settings, + True, self.chem_mapping) + else: + return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent, + self.alignments, self.calpha_only, settings, False) + + ############################################################################## # Class internal helpers (anything that doesnt easily work without this class) ############################################################################## @@ -537,6 +554,8 @@ class QSscorer: def _ComputeScores(self): """Fills cached global_score and best_score.""" + if len(self.chain_mapping) < 2: + raise QSscoreError("QS-score is not defined for monomers") # get contacts if self.calpha_only: contacts_1 = self.qs_ent_1.contacts_ca @@ -554,22 +573,6 @@ class QSscorer: % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(), self._best_score, self._global_score)) - def _ComputeLDDT(self): - """Fills cached lddt_score, lddt_mdl and lddt_ref.""" - LogInfo('Computing lDDT score') - # check reference and model - ref, mdl = self.qs_ent_1.ent, self.qs_ent_2.ent - LogInfo('Reference %s has: %s chains' % (ref.GetName(), ref.chain_count)) - LogInfo('Model %s has: %s chains' % (mdl.GetName(), mdl.chain_count)) - if mdl.chain_count > ref.chain_count: - LogWarning('MODEL contains more chains than REFERENCE, ' - 'lDDT is not considering them') - # get single chain reference and model - self._lddt_ref, self._lddt_mdl = \ - _MergeAlignedChains(self.alignments, ref, mdl, self.calpha_only) - # score them (mdl and ref changed) and keep results - self._lddt_score = _ComputeLDDTScore(self._lddt_ref, self._lddt_mdl) - ############################################################################### # Entity with cached entries for QS scoring @@ -639,9 +642,8 @@ class QSscoreEntity(object): 'removing water, ligands and small peptides.') self.is_valid = False elif self.ent.chain_count == 1: - LogError('Structure ' + ent.GetName() + ' is a monomer. ' - 'QSscore is not defined for monomers.') - self.is_valid = False + LogWarning('Structure ' + ent.GetName() + ' is a monomer.') + self.is_valid = True else: self.is_valid = True # init cached stuff @@ -898,6 +900,528 @@ def GetContacts(entity, calpha_only, dist_thr=12.0): # DONE return contacts +############################################################################### +# Oligo-lDDT scores +############################################################################### + +class OligoLDDTScorer(object): + """Helper class to calculate oligomeric lDDT scores. + + This class can be used independently, but commonly it will be created by + calling :func:`QSscorer.GetOligoLDDTScorer`. + + .. note:: + + By construction, lDDT scores are not symmetric and hence it matters which + structure is the reference (:attr:`ref`) and which one is the model + (:attr:`mdl`). Extra residues in the model are generally not considered. + Extra chains in both model and reference can be considered by setting the + :attr:`penalize_extra_chains` flag to True. + + :param ref: Sets :attr:`ref` + :param mdl: Sets :attr:`mdl` + :param alignments: Sets :attr:`alignments` + :param calpha_only: Sets :attr:`calpha_only` + :param settings: Sets :attr:`settings` + :param penalize_extra_chains: Sets :attr:`penalize_extra_chains` + :param chem_mapping: Sets :attr:`chem_mapping`. Must be given if + *penalize_extra_chains* is True. + + .. attribute:: ref + mdl + + Full reference/model entity to be scored. The entity must contain all chains + mapped in :attr:`alignments` and may also contain additional ones which are + considered if :attr:`penalize_extra_chains` is True. + + :type: :class:`~ost.mol.EntityHandle` + + .. attribute:: alignments + + One alignment for each mapped chain of :attr:`ref`/:attr:`mdl` as defined in + :attr:`QSscorer.alignments`. The first sequence of each alignment belongs to + :attr:`ref` and the second one to :attr:`mdl`. Sequences must have sequence + naming and attached views as defined in :attr:`QSscorer.alignments`. + + :type: :class:`list` of :class:`~ost.seq.AlignmentHandle` + + .. attribute:: calpha_only + + If True, restricts lDDT score to CA only. + + :type: :class:`bool` + + .. attribute:: settings + + Settings to use for lDDT scoring. + + :type: :class:`~ost.mol.alg.lDDTSettings` + + .. attribute:: penalize_extra_chains + + If True, extra chains in both :attr:`ref` and :attr:`mdl` will penalize the + lDDT scores. + + :type: :class:`bool` + + .. attribute:: chem_mapping + + Inter-complex mapping of chemical groups as defined in + :attr:`QSscorer.chem_mapping`. Used to find "chem-mapped" chains in + :attr:`ref` for unmapped chains in :attr:`mdl` when penalizing scores. + Each unmapped model chain can add extra reference-contacts according to the + average total contacts of each single "chem-mapped" reference chain. If + there is no "chem-mapped" reference chain, a warning is shown and the model + chain is ignored. + + + Only relevant if :attr:`penalize_extra_chains` is True. + + :type: :class:`dict` with key = :class:`tuple` of chain names in + :attr:`ref` and value = :class:`tuple` of chain names in + :attr:`mdl`. + """ + + # NOTE: one could also allow computation of both penalized and unpenalized + # in same object -> must regenerate lddt_ref / lddt_mdl though + + def __init__(self, ref, mdl, alignments, calpha_only, settings, + penalize_extra_chains=False, chem_mapping=None): + # sanity checks + if chem_mapping is None and penalize_extra_chains: + raise RuntimeError("Must provide chem_mapping when requesting penalty " + "for extra chains!") + if not penalize_extra_chains: + # warn for unmapped model chains + unmapped_mdl_chains = self._GetUnmappedMdlChains(mdl, alignments) + if unmapped_mdl_chains: + LogWarning('MODEL contains chains unmapped to REFERENCE, ' + 'lDDT is not considering MODEL chains %s' \ + % str(list(unmapped_mdl_chains))) + # warn for unmapped reference chains + ref_chains = set(ch.name for ch in ref.chains) + mapped_ref_chains = set(aln.GetSequence(0).GetName() for aln in alignments) + unmapped_ref_chains = (ref_chains - mapped_ref_chains) + if unmapped_ref_chains: + LogWarning('REFERENCE contains chains unmapped to MODEL, ' + 'lDDT is not considering REFERENCE chains %s' \ + % str(list(unmapped_ref_chains))) + # prepare fields + self.ref = ref + self.mdl = mdl + self.alignments = alignments + self.calpha_only = calpha_only + self.settings = settings + self.penalize_extra_chains = penalize_extra_chains + self.chem_mapping = chem_mapping + self._sc_lddt = None + self._oligo_lddt = None + self._weighted_lddt = None + self._lddt_ref = None + self._lddt_mdl = None + self._oligo_lddt_scorer = None + self._mapped_lddt_scorers = None + self._ref_scorers = None + self._model_penalty = None + + @property + def oligo_lddt(self): + """Oligomeric lDDT score. + + The score is computed as conserved contacts divided by the total contacts + in the reference using the :attr:`oligo_lddt_scorer`, which uses the full + complex as reference/model structure. If :attr:`penalize_extra_chains` is + True, the reference/model complexes contain all chains (otherwise only the + mapped ones) and additional contacts are added to the reference's total + contacts for unmapped model chains according to the :attr:`chem_mapping`. + + The main difference with :attr:`weighted_lddt` is that the lDDT scorer + "sees" the full complex here (incl. inter-chain contacts), while the + weighted single chain score looks at each chain separately. + + :getter: Computed on first use (cached) + :type: :class:`float` + """ + if self._oligo_lddt is None: + LogInfo('Reference %s has: %s chains' \ + % (self.ref.GetName(), self.ref.chain_count)) + LogInfo('Model %s has: %s chains' \ + % (self.mdl.GetName(), self.mdl.chain_count)) + + # score with or w/o extra-chain penalty + if self.penalize_extra_chains: + denominator = self.oligo_lddt_scorer.total_contacts + denominator += self._GetModelPenalty() + if denominator > 0: + oligo_lddt = self.oligo_lddt_scorer.conserved_contacts \ + / float(denominator) + else: + oligo_lddt = 0.0 + else: + oligo_lddt = self.oligo_lddt_scorer.global_score + self._oligo_lddt = oligo_lddt + return self._oligo_lddt + + @property + def weighted_lddt(self): + """Weighted average of single chain lDDT scores. + + The score is computed as a weighted average of single chain lDDT scores + (see :attr:`sc_lddt_scorers`) using the total contacts of each single + reference chain as weights. If :attr:`penalize_extra_chains` is True, + unmapped chains are added with a 0 score and total contacts taken from + the actual reference chains or (for unmapped model chains) using the + :attr:`chem_mapping`. + + See :attr:`oligo_lddt` for a comparison of the two scores. + + :getter: Computed on first use (cached) + :type: :class:`float` + """ + if self._weighted_lddt is None: + scores = [s.global_score for s in self.sc_lddt_scorers] + weights = [s.total_contacts for s in self.sc_lddt_scorers] + nominator = sum([s * w for s, w in zip(scores, weights)]) + if self.penalize_extra_chains: + ref_scorers = self._GetRefScorers() + denominator = sum(s.total_contacts for s in ref_scorers.values()) + denominator += self._GetModelPenalty() + else: + denominator = sum(weights) + if denominator > 0: + self._weighted_lddt = nominator / float(denominator) + else: + self._weighted_lddt = 0.0 + return self._weighted_lddt + + @property + def lddt_ref(self): + """The reference entity used for oligomeric lDDT scoring + (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`). + + Since the lDDT computation requires a single chain with mapped residue + numbering, all chains of :attr:`ref` are appended into a single chain X with + unique residue numbers according to the column-index in the alignment. The + alignments are in the same order as they appear in :attr:`alignments`. + Additional residues are appended at the end of the chain with unique residue + numbers. Unmapped chains are only added if :attr:`penalize_extra_chains` is + True. Only CA atoms are considered if :attr:`calpha_only` is True. + + :getter: Computed on first use (cached) + :type: :class:`~ost.mol.EntityHandle` + """ + if self._lddt_ref is None: + self._PrepareOligoEntities() + return self._lddt_ref + + @property + def lddt_mdl(self): + """The model entity used for oligomeric lDDT scoring + (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`). + + Like :attr:`lddt_ref`, this is a single chain X containing all chains of + :attr:`mdl`. The residue numbers match the ones in :attr:`lddt_ref` where + aligned and have unique numbers for additional residues. + + :getter: Computed on first use (cached) + :type: :class:`~ost.mol.EntityHandle` + """ + if self._lddt_mdl is None: + self._PrepareOligoEntities() + return self._lddt_mdl + + @property + def oligo_lddt_scorer(self): + """lDDT Scorer object for :attr:`lddt_ref` and :attr:`lddt_mdl`. + + :getter: Computed on first use (cached) + :type: :class:`~ost.mol.alg.lDDTScorer` + """ + if self._oligo_lddt_scorer is None: + self._oligo_lddt_scorer = lDDTScorer( + references=[self.lddt_ref.Select("")], + model=self.lddt_mdl.Select(""), + settings=self.settings) + return self._oligo_lddt_scorer + + @property + def mapped_lddt_scorers(self): + """List of scorer objects for each chain mapped in :attr:`alignments`. + + :getter: Computed on first use (cached) + :type: :class:`list` of :class:`MappedLDDTScorer` + """ + if self._mapped_lddt_scorers is None: + self._mapped_lddt_scorers = list() + for aln in self.alignments: + mapped_lddt_scorer = MappedLDDTScorer(aln, self.calpha_only, + self.settings) + self._mapped_lddt_scorers.append(mapped_lddt_scorer) + return self._mapped_lddt_scorers + + @property + def sc_lddt_scorers(self): + """List of lDDT scorer objects extracted from :attr:`mapped_lddt_scorers`. + + :type: :class:`list` of :class:`~ost.mol.alg.lDDTScorer` + """ + return [mls.lddt_scorer for mls in self.mapped_lddt_scorers] + + @property + def sc_lddt(self): + """List of global scores extracted from :attr:`sc_lddt_scorers`. + + If scoring for a mapped chain fails, an error is displayed and a score of 0 + is assigned. + + :getter: Computed on first use (cached) + :type: :class:`list` of :class:`float` + """ + if self._sc_lddt is None: + self._sc_lddt = list() + for lddt_scorer in self.sc_lddt_scorers: + try: + self._sc_lddt.append(lddt_scorer.global_score) + except Exception as ex: + LogError('Single chain lDDT failed:', str(ex)) + self._sc_lddt.append(0.0) + return self._sc_lddt + + ############################################################################## + # Class internal helpers + ############################################################################## + + def _PrepareOligoEntities(self): + # simple wrapper to avoid code duplication + self._lddt_ref, self._lddt_mdl = _MergeAlignedChains( + self.alignments, self.ref, self.mdl, self.calpha_only, + self.penalize_extra_chains) + + @staticmethod + def _GetUnmappedMdlChains(mdl, alignments): + # assume model is second sequence in alignment and is named by chain + mdl_chains = set(ch.name for ch in mdl.chains) + mapped_mdl_chains = set(aln.GetSequence(1).GetName() for aln in alignments) + return (mdl_chains - mapped_mdl_chains) + + def _GetRefScorers(self): + # single chain lddt scorers for each reference chain (key = chain name) + if self._ref_scorers is None: + # collect from mapped_lddt_scorers + ref_scorers = dict() + for mapped_lddt_scorer in self.mapped_lddt_scorers: + ref_ch_name = mapped_lddt_scorer.reference_chain_name + ref_scorers[ref_ch_name] = mapped_lddt_scorer.lddt_scorer + # add new ones where needed + for ch in self.ref.chains: + if ch.name not in ref_scorers: + if self.calpha_only: + ref_chain = ch.Select('aname=CA') + else: + ref_chain = ch.Select('') + ref_scorers[ch.name] = lDDTScorer( + references=[ref_chain], + model=ref_chain, + settings=self.settings) + # store in cache + self._ref_scorers = ref_scorers + # fetch from cache + return self._ref_scorers + + def _GetModelPenalty(self): + # extra value to add to total number of distances for extra model chains + # -> estimated from chem-mapped reference chains + if self._model_penalty is None: + # sanity check + if self.chem_mapping is None: + raise RuntimeError("Must provide chem_mapping when requesting penalty " + "for extra model chains!") + # get cached ref_scorers + ref_scorers = self._GetRefScorers() + # get unmapped model chains + unmapped_mdl_chains = self._GetUnmappedMdlChains(self.mdl, self.alignments) + # map extra chains to ref. chains + model_penalty = 0 + for ch_name_mdl in sorted(unmapped_mdl_chains): + # get penalty for chain + cur_penalty = None + for cm_ref, cm_mdl in self.chem_mapping.iteritems(): + if ch_name_mdl in cm_mdl: + # penalize by an average of the chem. mapped ref. chains + cur_penalty = 0 + for ch_name_ref in cm_ref: + # assumes that total_contacts is cached (for speed) + cur_penalty += ref_scorers[ch_name_ref].total_contacts + cur_penalty /= float(len(cm_ref)) + break + # report penalty + if cur_penalty is None: + LogWarning('Extra MODEL chain %s could not be chemically mapped to ' + 'any chain in REFERENCE, lDDT cannot consider it!' \ + % ch_name_mdl) + else: + LogScript('Extra MODEL chain %s added to lDDT score by considering ' + 'chemically mapped chains in REFERENCE.' % ch_name_mdl) + model_penalty += cur_penalty + # store in cache + self._model_penalty = model_penalty + # fetch from cache + return self._model_penalty + + +class MappedLDDTScorer(object): + """A simple class to calculate a single-chain lDDT score on a given chain to + chain mapping as extracted from :class:`OligoLDDTScorer`. + + :param alignment: Sets :attr:`alignment` + :param calpha_only: Sets :attr:`calpha_only` + :param settings: Sets :attr:`settings` + + .. attribute:: alignment + + Alignment with two sequences named according to the mapped chains and with + views attached to both sequences (e.g. one of the items of + :attr:`QSscorer.alignments`). + + The first sequence is assumed to be the reference and the second one the + model. Since the lDDT score is not symmetric (extra residues in model are + ignored), the order is important. + + :type: :class:`~ost.seq.AlignmentHandle` + + .. attribute:: calpha_only + + If True, restricts lDDT score to CA only. + + :type: :class:`bool` + + .. attribute:: settings + + Settings to use for lDDT scoring. + + :type: :class:`~ost.mol.alg.lDDTSettings` + + .. attribute:: lddt_scorer + + lDDT Scorer object for the given chains. + + :type: :class:`~ost.mol.alg.lDDTScorer` + + .. attribute:: reference_chain_name + + Chain name of the reference. + + :type: :class:`str` + + .. attribute:: model_chain_name + + Chain name of the model. + + :type: :class:`str` + """ + def __init__(self, alignment, calpha_only, settings): + # prepare fields + self.alignment = alignment + self.calpha_only = calpha_only + self.settings = settings + self.lddt_scorer = None # set in _InitScorer + self.reference_chain_name = alignment.sequences[0].name + self.model_chain_name = alignment.sequences[1].name + self._old_number_label = "old_num" + self._extended_alignment = None # set in _InitScorer + # initialize lDDT scorer + self._InitScorer() + + def GetPerResidueScores(self): + """ + :return: Scores for each residue + :rtype: :class:`list` of :class:`dict` with one item for each residue + existing in model and reference: + + - "residue_number": Residue number in reference chain + - "residue_name": Residue name in reference chain + - "lddt": local lDDT + - "conserved_contacts": number of conserved contacts + - "total_contacts": total number of contacts + """ + scores = list() + assigned_residues = list() + # Make sure the score is calculated + self.lddt_scorer.global_score + for col in self._extended_alignment: + if col[0] != "-" and col.GetResidue(3).IsValid(): + ref_res = col.GetResidue(0) + mdl_res = col.GetResidue(1) + ref_res_renum = col.GetResidue(2) + mdl_res_renum = col.GetResidue(3) + if ref_res.one_letter_code != ref_res_renum.one_letter_code: + raise RuntimeError("Reference residue name mapping inconsistent: %s != %s" % + (ref_res.one_letter_code, + ref_res_renum.one_letter_code)) + if mdl_res.one_letter_code != mdl_res_renum.one_letter_code: + raise RuntimeError("Model residue name mapping inconsistent: %s != %s" % + (mdl_res.one_letter_code, + mdl_res_renum.one_letter_code)) + if ref_res.GetNumber().num != ref_res_renum.GetIntProp(self._old_number_label): + raise RuntimeError("Reference residue number mapping inconsistent: %s != %s" % + (ref_res.GetNumber().num, + ref_res_renum.GetIntProp(self._old_number_label))) + if mdl_res.GetNumber().num != mdl_res_renum.GetIntProp(self._old_number_label): + raise RuntimeError("Model residue number mapping inconsistent: %s != %s" % + (mdl_res.GetNumber().num, + mdl_res_renum.GetIntProp(self._old_number_label))) + if ref_res.qualified_name in assigned_residues: + raise RuntimeError("Duplicated residue in reference: " % + (ref_res.qualified_name)) + else: + assigned_residues.append(ref_res.qualified_name) + # check if property there (may be missing for CA-only) + if mdl_res_renum.HasProp(self.settings.label): + scores.append({ + "residue_number": ref_res.GetNumber().num, + "residue_name": ref_res.name, + "lddt": mdl_res_renum.GetFloatProp(self.settings.label), + "conserved_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_conserved"), + "total_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_total")}) + return scores + + ############################################################################## + # Class internal helpers (anything that doesnt easily work without this class) + ############################################################################## + + def _InitScorer(self): + # Use copy of alignment (extended by 2 extra sequences for renumbering) + aln = self.alignment.Copy() + # Get chains and renumber according to alignment (for lDDT) + reference = Renumber( + aln.GetSequence(0), + old_number_label=self._old_number_label).CreateFullView() + refseq = seq.CreateSequence( + "reference_renumbered", + aln.GetSequence(0).GetString()) + refseq.AttachView(reference) + aln.AddSequence(refseq) + model = Renumber( + aln.GetSequence(1), + old_number_label=self._old_number_label).CreateFullView() + modelseq = seq.CreateSequence( + "model_renumbered", + aln.GetSequence(1).GetString()) + modelseq.AttachView(model) + aln.AddSequence(modelseq) + # Filter to CA-only if desired (done after AttachView to not mess it up) + if self.calpha_only: + self.lddt_scorer = lDDTScorer( + references=[reference.Select('aname=CA')], + model=model.Select('aname=CA'), + settings=self.settings) + else: + self.lddt_scorer = lDDTScorer( + references=[reference], + model=model, + settings=self.settings) + # Store alignment for later + self._extended_alignment = aln ############################################################################### # HELPERS @@ -909,7 +1433,7 @@ def _AlignAtomSeqs(seq_1, seq_2): """ :type seq_1: :class:`ost.seq.SequenceHandle` :type seq_2: :class:`ost.seq.SequenceHandle` - :return: Alignment of two sequences using a global aignment. Views attached + :return: Alignment of two sequences using a global alignment. Views attached to the input sequences will remain attached in the aln. :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed. """ @@ -924,6 +1448,28 @@ def _AlignAtomSeqs(seq_1, seq_2): LogWarning('%s: %s' % (seq_2.name, seq_2.string)) return aln +def _FixSelectChainName(ch_name): + """ + :return: String to be used with Select(cname=<RETURN>). Takes care of putting + quotation marks where needed. + :rtype: :class:`str` + :param ch_name: Single chain name (:class:`str`). + """ + if ch_name in ['-', '_', ' ']: + return '"%c"' % ch_name + else: + return ch_name + +def _FixSelectChainNames(ch_names): + """ + :return: String to be used with Select(cname=<RETURN>). Takes care of joining + and putting quotation marks where needed. + :rtype: :class:`str` + :param ch_names: Some iterable list of chain names (:class:`str` items). + """ + chain_set = set([_FixSelectChainName(ch_name) for ch_name in ch_names]) + return ','.join(chain_set) + # QS entity def _CleanInputEntity(ent): @@ -947,13 +1493,7 @@ def _CleanInputEntity(ent): # remove them from *ent* if removed_chains: - chain_set = set() - for ch_name in removed_chains: - if ch_name in ['-', '_', ' ']: - chain_set.add('"%c"' % ch_name) - else: - chain_set.add(ch_name) - view = ent.Select('cname!=%s' % ','.join(chain_set)) + view = ent.Select('cname!=%s' % _FixSelectChainNames(removed_chains)) ent_new = mol.CreateEntityFromView(view, True) ent_new.SetName(ent.GetName()) else: @@ -961,7 +1501,7 @@ def _CleanInputEntity(ent): # check if CA only calpha_only = False - if ent_new.Select('aname=CB').atom_count == 0: + if ent_new.atom_count > 0 and ent_new.Select('aname=CB').atom_count == 0: LogInfo('Structure %s is a CA only structure!' % ent_new.GetName()) calpha_only = True @@ -1130,8 +1670,8 @@ def _GetChemGroupsMapping(qs_ent_1, qs_ent_2): # check if we have any chains left LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping)) - if len(mapped_1) < 2 or len(mapped_2) < 2: - raise QSscoreError('Less than 2 chains left in chem_mapping.') + if len(mapped_1) < 1 or len(mapped_2) < 1: + raise QSscoreError('Less than 1 chains left in chem_mapping.') return chem_mapping def _SelectFew(l, max_elements): @@ -1160,6 +1700,13 @@ def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, :param chem_mapping: See :attr:`QSscorer.chem_mapping` :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm` """ + # make sure name doesn't contain spaces and is unique + def _FixName(seq_name, seq_names): + # get rid of spaces and make it unique + seq_name = seq_name.replace(' ', '-') + while seq_name in seq_names: + seq_name += '-' + return seq_name # resulting views into CA entities using CA chain sequences ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView() ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView() @@ -1173,12 +1720,12 @@ def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, seq_to_empty_view = dict() for ch in group_1: sequence = ca_chains_1[ch].Copy() - sequence.name = qs_ent_1.GetName() + '.' + ch + sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view) seq_to_empty_view[sequence.name] = ent_view_1 seq_list.AddSequence(sequence) for ch in group_2: sequence = ca_chains_2[ch].Copy() - sequence.name = qs_ent_2.GetName() + '.' + ch + sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view) seq_to_empty_view[sequence.name] = ent_view_2 seq_list.AddSequence(sequence) alnc = ClustalW(seq_list, clustalw=clustalw_bin) @@ -1248,8 +1795,8 @@ def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping): for _, symm_1, symm_2 in sorted(best_symm): s1 = symm_1[0] s2 = symm_2[0] - group_1 = ent_to_cm_1.Select('cname=%s' % ','.join(s1)) - group_2 = ent_to_cm_2.Select('cname=%s' % ','.join(s2)) + group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1)) + group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2)) # check if by superposing a pair of chains within the symmetry group to # superpose all chains within the symmetry group # -> if successful, the symmetry groups are compatible @@ -1266,7 +1813,9 @@ def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping): def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping): """ - :return: Mapping from *ent_1* to *ent_2* (see :attr:`QSscorer.chain_mapping`) + :return: Tuple with mapping from *ent_1* to *ent_2* (see + :attr:`QSscorer.chain_mapping`) and scheme used (see + :attr:`QSscorer.chain_mapping_scheme`) :param ent_1: See :attr:`QSscorer.ent_to_cm_1` :param ent_2: See :attr:`QSscorer.ent_to_cm_2` @@ -1292,7 +1841,7 @@ def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping): if scheme == 'permissive': LogWarning('Permissive thresholds used for overlap based mapping ' + \ 'detection: check mapping manually: %s' % mapping) - return mapping + return mapping, scheme # NOTE that what follows below is sub-optimal: # - if the two structures don't fit at all, we may map chains rather randomly @@ -1392,7 +1941,7 @@ def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping): LogWarning('Extensive search used for mapping detection (fallback). This ' + \ 'approach has known limitations. Check mapping manually: %s' \ % mapping) - return mapping + return mapping, 'extensive' def _GetSymmetrySubgroups(qs_ent, ent, chem_groups): @@ -1628,7 +2177,8 @@ def _GetClosestChainInterface(ent, ref_chain, chains): # inaccurate. Also it could be extracted from QSscoreEntity.contacts. closest = [] for ch in chains: - iface_view = ent.Select('cname=%s and 10 <> [cname=%s]' % (ref_chain, ch)) + iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \ + % (ref_chain, ch)) nr_res = iface_view.residue_count closest.append((nr_res, ch)) closest_chain = max(closest)[1] @@ -1820,7 +2370,7 @@ def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping, overlapped for overlap to be sufficient. :type sup_fract: :class:`float` :param find_best: If True, we look for best mapping according to - :func:`_ChainRMSD`. Otherwise, we return first suitable + :func:`_GetMappedRMSD`. Otherwise, we return first suitable mapping. :type find_best: :class:`bool` @@ -1840,8 +2390,8 @@ def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping, # to superpose the full oligomer (e.g. if some chains are open/closed) for c1, c2 in itertools.product(g1, g2): # get superposition transformation - chain_1 = ent_1.Select('cname=%s' % c1) - chain_2 = ent_2.Select('cname=%s' % c2) + chain_1 = ent_1.Select('cname="%s"' % c1) + chain_2 = ent_2.Select('cname="%s"' % c2) res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False) # look for overlaps mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, @@ -1945,8 +2495,8 @@ def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation): atoms = [] for c1, c2 in chain_mapping.iteritems(): # get views and atom counts - chain_1 = ent_1.Select('cname=%s' % c1) - chain_2 = ent_2.Select('cname=%s' % c2) + chain_1 = ent_1.Select('cname="%s"' % c1) + chain_2 = ent_2.Select('cname="%s"' % c2) atom_count = chain_1.atom_count if atom_count != chain_2.atom_count: raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!') @@ -1978,13 +2528,13 @@ class _CachedRMSD: def GetChainView1(self, cname): """Get cached view on chain *cname* for :attr:`ent_1`.""" if cname not in self._chain_views_1: - self._chain_views_1[cname] = self.ent_1.Select('cname=%s' % cname) + self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname) return self._chain_views_1[cname] def GetChainView2(self, cname): """Get cached view on chain *cname* for :attr:`ent_2`.""" if cname not in self._chain_views_2: - self._chain_views_2[cname] = self.ent_2.Select('cname=%s' % cname) + self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname) return self._chain_views_2[cname] def GetSuperposition(self, c1, c2): @@ -2087,7 +2637,7 @@ def _AreValidSymmetries(symm_1, symm_2): return False return True -def _GetMappedAlignments(ent_1, ent_2, chain_mapping): +def _GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment): """ :return: Alignments of 2 structures given chain mapping (see :attr:`QSscorer.alignments`). @@ -2096,17 +2646,39 @@ def _GetMappedAlignments(ent_1, ent_2, chain_mapping): :param ent_2: Entity containing all chains in *chain_mapping.values()*. Views to this entity attached to second sequence of each aln. :param chain_mapping: See :attr:`QSscorer.chain_mapping` + :param res_num_alignment: See :attr:`QSscorer.res_num_alignment` """ - alns = [] + alns = list() for ch_1_name in sorted(chain_mapping): # get both sequences incl. attached view ch_1 = ent_1.FindChain(ch_1_name) - seq_1 = seq.SequenceFromChain(ch_1.name, ch_1) ch_2 = ent_2.FindChain(chain_mapping[ch_1_name]) - seq_2 = seq.SequenceFromChain(ch_2.name, ch_2) - # align them - aln = _AlignAtomSeqs(seq_1, seq_2) - if aln: alns.append(aln) + if res_num_alignment: + max_res_num = max([r.number.GetNum() for r in ch_1.residues] + + [r.number.GetNum() for r in ch_2.residues]) + ch1_aln = ["-"] * max_res_num + ch2_aln = ["-"] * max_res_num + for res in ch_1.residues: + ch1_aln[res.number.GetNum() - 1] = res.GetOneLetterCode() + ch1_aln = "".join(ch1_aln) + seq_1 = seq.CreateSequence(ch_1.name, str(ch1_aln)) + seq_1.AttachView(ch_1.Select("")) + for res in ch_2.residues: + ch2_aln[res.number.GetNum() - 1] = res.GetOneLetterCode() + ch2_aln = "".join(ch2_aln) + seq_2 = seq.CreateSequence(ch_2.name, str(ch2_aln)) + seq_2.AttachView(ch_2.Select("")) + # Create alignment + aln = seq.CreateAlignment() + aln.AddSequence(seq_1) + aln.AddSequence(seq_2) + else: + seq_1 = seq.SequenceFromChain(ch_1.name, ch_1) + seq_2 = seq.SequenceFromChain(ch_2.name, ch_2) + # align them + aln = _AlignAtomSeqs(seq_1, seq_2) + if aln: + alns.append(aln) return alns def _GetMappedResidues(alns): @@ -2301,7 +2873,7 @@ def _AddResidue(edi, res, rnum, chain, calpha_only): for atom in res.atoms: edi.InsertAtom(new_res, atom.name, atom.pos) -def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): +def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains): """ Create two new entities (based on the alignments attached views) where all residues have same numbering (when they're aligned) and they are all pushed to @@ -2321,6 +2893,9 @@ def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): :type ent_2: :class:`~ost.mol.EntityHandle` :param calpha_only: If True, we only include CA atoms instead of all. :type calpha_only: :class:`bool` + :param penalize_extra_chains: If True, extra chains are added to model and + reference. Otherwise, only mapped ones. + :type penalize_extra_chains: :class:`bool` :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*) :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle` @@ -2349,19 +2924,20 @@ def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): res_2 = col.GetResidue(1) if res_2.IsValid(): _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only) - # extra chains - for chain in ent_1.chains: - if chain.name in chain_done_1: - continue - for res in chain.residues: - rnum += 1 - _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only) - for chain in ent_2.chains: - if chain.name in chain_done_2: - continue - for res in chain.residues: - rnum += 1 - _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only) + # extra chains? + if penalize_extra_chains: + for chain in ent_1.chains: + if chain.name in chain_done_1: + continue + for res in chain.residues: + rnum += 1 + _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only) + for chain in ent_2.chains: + if chain.name in chain_done_2: + continue + for res in chain.residues: + rnum += 1 + _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only) # get entity names ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName()) ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName()) @@ -2375,30 +2951,7 @@ def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only): ed_2.UpdateICS() return ent_ren_1, ent_ren_2 -def _ComputeLDDTScore(ref, mdl): - """ - :return: lDDT of *mdl* vs *ref* (see :attr:`QSscorer.lddt_score`). - :param mdl: Reference entity (see :attr:`QSscorer.lddt_mdl`) - :param ref: Model entity (see :attr:`QSscorer.lddt_ref`) - """ - # check input - LogInfo('Reference %s has: %s residues' % (ref.GetName(), ref.residue_count)) - LogInfo('Model %s has: %s residues' % (mdl.GetName(), mdl.residue_count)) - # get lddt score with fixed settings - lddt_score = mol.alg.LocalDistDiffTest(mdl.Select(''), ref.Select(''), - 2., 8., 'lddt') - LogInfo('lDDT score: %.3f' % lddt_score) - # add lDDT as B-factor to model - for r in mdl.residues: - if r.HasProp('lddt'): - for a in r.atoms: - a.SetBFactor(r.GetFloatProp('lddt')) - else: - for a in r.atoms: - a.SetBFactor(0.0) - - return lddt_score # specify public interface __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts', - 'GetContacts') + 'GetContacts', 'OligoLDDTScorer', 'MappedLDDTScorer') diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 9f6c7f850fcaa869be30b337b1c94c654e5d2927..1c42af212a6b849d977d2afb4f43923bd042ed56 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -18,7 +18,9 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> +#include <boost/python/raw_function.hpp> #include <boost/python/suite/indexing/map_indexing_suite.hpp> +#include <ost/log.hh> #include <ost/config.hh> #include <ost/mol/alg/local_dist_diff_test.hh> #include <ost/mol/alg/distance_test_common.hh> @@ -39,9 +41,12 @@ void export_svdSuperPose(); void export_TrajectoryAnalysis(); void export_StructureAnalysis(); void export_Clash(); +void export_NonStandard(); +void export_Molck(); void export_contact_overlap(); void export_accessibility(); void export_sec_struct(); +void export_find_membrane(); #if OST_IMG_ENABLED void export_entity_to_density(); #endif @@ -50,6 +55,7 @@ namespace { std::pair<long int,long int> (*lddt_a)(const mol::EntityView&, const mol::alg::GlobalRDMap& , std::vector<Real>, int, const String&)=&mol::alg::LocalDistDiffTest; Real (*lddt_c)(const mol::EntityView&, const mol::EntityView& , Real, Real, const String&)=&mol::alg::LocalDistDiffTest; +// Real (*lddt_d)(const mol::EntityView&, std::vector<mol::EntityView>&, const mol::alg::GlobalRDMap&, mol::alg::lDDTSettings&)=&mol::alg::LocalDistDiffTest; Real (*lddt_b)(const seq::AlignmentHandle&,Real, Real, int, int)=&mol::alg::LocalDistDiffTest; std::pair<mol::EntityView,mol::alg::ClashingInfo> (*fc_a)(const mol::EntityView&, const mol::alg::ClashingDistances&,bool)=&mol::alg::FilterClashes; std::pair<mol::EntityView,mol::alg::ClashingInfo> (*fc_b)(const mol::EntityHandle&, const mol::alg::ClashingDistances&, bool)=&mol::alg::FilterClashes; @@ -58,6 +64,18 @@ std::pair<mol::EntityView,mol::alg::StereoChemistryInfo> (*csc_b)(const mol::Ent mol::CoordGroupHandle (*superpose_frames1)(mol::CoordGroupHandle&, mol::EntityView&, int, int, int)=&mol::alg::SuperposeFrames; mol::CoordGroupHandle (*superpose_frames2)(mol::CoordGroupHandle&, mol::EntityView&, mol::EntityView&, int, int)=&mol::alg::SuperposeFrames; + +Real lddt_d(const mol::EntityView& model, list& reference_list, const mol::alg::GlobalRDMap& distance_list, mol::alg::lDDTSettings& settings){ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return LocalDistDiffTest(model, reference_list_vector, distance_list, settings); +} + ost::mol::alg::StereoChemicalParams fill_stereochemical_params_wrapper (const String& header, const list& stereo_chemical_props_file) { int stereo_chemical_props_file_length = boost::python::extract<int>(stereo_chemical_props_file.attr("__len__")()); @@ -82,13 +100,13 @@ ost::mol::alg::ClashingDistances fill_clashing_distances_wrapper (const list& st return ost::mol::alg::FillClashingDistances(stereo_chemical_props_file_vector); } -ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const list& ref_list, const list& cutoff_list, int sequence_separation, Real max_dist) +ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const list& reference_list, const list& cutoff_list, int sequence_separation, Real max_dist) { - int ref_list_length = boost::python::extract<int>(ref_list.attr("__len__")()); - std::vector<ost::mol::EntityView> ref_list_vector(ref_list_length); + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); - for (int i=0; i<ref_list_length; i++) { - ref_list_vector[i] = boost::python::extract<ost::mol::EntityView>(ref_list[i]); + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); } int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); @@ -97,10 +115,186 @@ ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const l for (int i=0; i<cutoff_list_length; i++) { cutoff_list_vector[i] = boost::python::extract<Real>(cutoff_list[i]); } - return ost::mol::alg::CreateDistanceListFromMultipleReferences(ref_list_vector, cutoff_list_vector, sequence_separation, max_dist); + return ost::mol::alg::CreateDistanceListFromMultipleReferences(reference_list_vector, cutoff_list_vector, sequence_separation, max_dist); +} + +object lDDTSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + Real radius = 15.0; + if(kwargs.contains("radius")){ + radius = extract<Real>(kwargs["radius"]); + kwargs["radius"].del(); + } + + int sequence_separation = 0; + if(kwargs.contains("sequence_separation")){ + sequence_separation = extract<int>(kwargs["sequence_separation"]); + kwargs["sequence_separation"].del(); + } + + std::vector<Real> cutoffs; + if(kwargs.contains("cutoffs")){ + list cutoff_list = extract<list>(kwargs["cutoffs"]); + int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); + for (int i=0; i<cutoff_list_length; i++) { + cutoffs.push_back(boost::python::extract<Real>(cutoff_list[i])); + } + kwargs["cutoffs"].del(); + } else { + cutoffs.push_back(0.5); + cutoffs.push_back(1.0); + cutoffs.push_back(2.0); + cutoffs.push_back(4.0); + } + + String label = "locallddt"; + if(kwargs.contains("label")){ + label = extract<String>(kwargs["label"]); + kwargs["label"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up lDDTSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: radius, "; + ss << "sequence_separation, parameter_file_path, "; + ss << "cutoffs, label!"; + throw std::invalid_argument(ss.str()); + } + + return self.attr("__init__")(radius, + sequence_separation, + cutoffs, + label); } +object lDDTScorerInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1, len(args))); + + std::vector<ost::mol::EntityView> reference_list_vector; + if(kwargs.contains("references")){ + list reference_list = boost::python::extract<list>(kwargs["references"]); + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + for (int i=0; i<reference_list_length; i++) { + reference_list_vector.push_back(boost::python::extract<ost::mol::EntityView>(reference_list[i])); + } + kwargs["references"].del(); + } else { + throw std::invalid_argument("'references' argument is required"); + } + ost::mol::EntityView model; + if(kwargs.contains("model")){ + model = boost::python::extract<ost::mol::EntityView>(kwargs["model"]); + kwargs["model"].del(); + } else { + throw std::invalid_argument("'model' argument is required"); + } + + ost::mol::alg::lDDTSettings settings; + if(kwargs.contains("settings")){ + settings = extract<ost::mol::alg::lDDTSettings>(kwargs["settings"]); + kwargs["settings"].del(); + } else { + throw std::invalid_argument("'settings' argument is required"); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up lDDTScorer! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: references, model and settings!"; + throw std::invalid_argument(ss.str()); + } + + return self.attr("__init__")(reference_list_vector, + model, + settings); +} + + +void clean_lddt_references_wrapper(const list& reference_list) +{ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return ost::mol::alg::CleanlDDTReferences(reference_list_vector); +} + +ost::mol::alg::GlobalRDMap prepare_lddt_global_rdmap_wrapper(const list& reference_list, + list& cutoff_list, + int sequence_separation, + Real max_dist) +{ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); + std::vector<Real> cutoff_list_vector(cutoff_list_length); + + for (int i=0; i<cutoff_list_length; i++) { + cutoff_list_vector[i] = boost::python::extract<Real>(cutoff_list[i]); + } + + return mol::alg::PreparelDDTGlobalRDMap(reference_list_vector, cutoff_list_vector, sequence_separation, max_dist); +} + +list get_lddt_per_residue_stats_wrapper(mol::EntityView& model, + ost::mol::alg::GlobalRDMap& distance_list, + bool structural_checks, + String label) { + std::vector<mol::alg::lDDTLocalScore> scores = GetlDDTPerResidueStats(model, distance_list, structural_checks, label); + list local_scores_list; + for (std::vector<mol::alg::lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + local_scores_list.append(*sit); + } + return local_scores_list; +} + +list get_local_scores_wrapper(mol::alg::lDDTScorer& scorer) { + std::vector<mol::alg::lDDTLocalScore> scores = scorer.GetLocalScores(); + list local_scores_list; + for (std::vector<mol::alg::lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + local_scores_list.append(*sit); + } + return local_scores_list; +} + +list get_references_wrapper(mol::alg::lDDTScorer& scorer) { + std::vector<mol::EntityView> references = scorer.GetReferences(); + list local_references_list; + for (std::vector<mol::EntityView>::const_iterator sit = references.begin(); sit != references.end(); ++sit) { + local_references_list.append(*sit); + } + return local_references_list; +} + + +void print_lddt_per_residue_stats_wrapper(list& scores, bool structural_checks, int cutoffs_size){ + int scores_length = boost::python::extract<int>(scores.attr("__len__")()); + std::vector<mol::alg::lDDTLocalScore> scores_vector(scores_length); + + for (int i=0; i<scores_length; i++) { + scores_vector[i] = boost::python::extract<mol::alg::lDDTLocalScore>(scores[i]); + } + + return mol::alg::PrintlDDTPerResidueStats(scores_vector, structural_checks, cutoffs_size); +} + } @@ -110,9 +304,12 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_TrajectoryAnalysis(); export_StructureAnalysis(); export_Clash(); + export_NonStandard(); + export_Molck(); export_contact_overlap(); export_accessibility(); export_sec_struct(); + export_find_membrane(); #if OST_IMG_ENABLED export_entity_to_density(); #endif @@ -120,6 +317,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) def("LocalDistDiffTest", lddt_a, (arg("sequence_separation")=0,arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_c, (arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_b, (arg("ref_index")=0, arg("mdl_index")=1)); + def("LocalDistDiffTest", &lddt_d, (arg("model"), arg("reference_list"), ("distance_list"), arg("settings"))); def("FilterClashes", fc_a, (arg("ent"), arg("clashing_distances"), arg("always_remove_bb")=false)); def("FilterClashes", fc_b, (arg("ent"), arg("clashing_distances"), arg("always_remove_bb")=false)); def("CheckStereoChemistry", csc_a, (arg("ent"), arg("bonds"), arg("angles"), arg("bond_tolerance"), arg("angle_tolerance"), arg("always_remove_bb")=false)); @@ -161,13 +359,74 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) .def("GetResidueName",&mol::alg::UniqueAtomIdentifier::GetResidueName) .def("GetAtomName",&mol::alg::UniqueAtomIdentifier::GetAtomName) .def("GetQualifiedAtomName",&mol::alg::UniqueAtomIdentifier::GetQualifiedAtomName) - ; + ; + + class_<mol::alg::lDDTSettings>("lDDTSettings", no_init) + .def("__init__", raw_function(lDDTSettingsInitWrapper)) + .def(init<Real, int, std::vector<Real>&, String>()) + .def("ToString", &mol::alg::lDDTSettings::ToString) + .def("PrintParameters", &mol::alg::lDDTSettings::PrintParameters) + .def("__repr__", &mol::alg::lDDTSettings::ToString) + .def("__str__", &mol::alg::lDDTSettings::ToString) + .def_readwrite("radius", &mol::alg::lDDTSettings::radius) + .def_readwrite("sequence_separation", &mol::alg::lDDTSettings::sequence_separation) + .def_readwrite("cutoffs", &mol::alg::lDDTSettings::cutoffs) + .def_readwrite("label", &mol::alg::lDDTSettings::label); + + class_<mol::alg::lDDTLocalScore>("lDDTLocalScore", init<String, String, int, String, String, Real, int, int>()) + .def("ToString", &mol::alg::lDDTLocalScore::ToString) + .def("GetHeader", &mol::alg::lDDTLocalScore::GetHeader) + .def("__str__", &mol::alg::lDDTLocalScore::Repr) + .def("__repr__", &mol::alg::lDDTLocalScore::Repr) + .def_readwrite("cname", &mol::alg::lDDTLocalScore::cname) + .def_readwrite("rname", &mol::alg::lDDTLocalScore::rname) + .def_readwrite("rnum", &mol::alg::lDDTLocalScore::rnum) + .def_readwrite("is_assessed", &mol::alg::lDDTLocalScore::is_assessed) + .def_readwrite("quality_problems", &mol::alg::lDDTLocalScore::quality_problems) + .def_readwrite("local_lddt", &mol::alg::lDDTLocalScore::local_lddt) + .def_readwrite("conserved_dist", &mol::alg::lDDTLocalScore::conserved_dist) + .def_readwrite("total_dist", &mol::alg::lDDTLocalScore::total_dist); + + class_<mol::alg::lDDTScorer>("lDDTScorer", no_init) + .def("__init__", raw_function(lDDTScorerInitWrapper)) + .def(init<std::vector<mol::EntityView>&, mol::EntityView&, mol::alg::lDDTSettings&>()) + .add_property("global_score", &mol::alg::lDDTScorer::GetGlobalScore) + .add_property("conserved_contacts", &mol::alg::lDDTScorer::GetNumConservedContacts) + .add_property("total_contacts", &mol::alg::lDDTScorer::GetNumTotalContacts) + .def("PrintPerResidueStats", &mol::alg::lDDTScorer::PrintPerResidueStats) + .add_property("local_scores", &get_local_scores_wrapper) + .def_readonly("model", &mol::alg::lDDTScorer::model_view) + .add_property("references", &get_references_wrapper) + .add_property("is_valid", &mol::alg::lDDTScorer::IsValid); + + class_<mol::alg::StereoChemicalProps>("StereoChemicalProps", + init<mol::alg::StereoChemicalParams&, + mol::alg::StereoChemicalParams&, + mol::alg::ClashingDistances&>()) + .def_readwrite("bond_table", &mol::alg::StereoChemicalProps::bond_table) + .def_readwrite("angle_table", &mol::alg::StereoChemicalProps::angle_table) + .def_readwrite("nonbonded_table", &mol::alg::StereoChemicalProps::nonbonded_table); def("FillClashingDistances",&fill_clashing_distances_wrapper); def("FillStereoChemicalParams",&fill_stereochemical_params_wrapper); def("IsStandardResidue",&mol::alg::IsStandardResidue); def("PrintGlobalRDMap",&mol::alg::PrintGlobalRDMap); def("PrintResidueRDMap",&mol::alg::PrintResidueRDMap); + def("CleanlDDTReferences", &clean_lddt_references_wrapper); + def("PreparelDDTGlobalRDMap", + &prepare_lddt_global_rdmap_wrapper, + (arg("reference_list"), arg("cutoffs"), arg("sequence_separation"), arg("radius"))); + def("CheckStructure", + &mol::alg::CheckStructure, + (arg("ent"), arg("bond_table"), arg("angle_table"), arg("nonbonded_table"), + arg("bond_tolerance"), arg("angle_tolerance"))); + def("GetlDDTPerResidueStats", + &get_lddt_per_residue_stats_wrapper, + (arg("model"), arg("distance_list"), arg("structural_checks"), arg("label"))); + def("PrintlDDTPerResidueStats", + &print_lddt_per_residue_stats_wrapper, + (arg("scores"), arg("structural_checks"), arg("cutoff_list_length"))); + class_<mol::alg::PDBize>("PDBize", init<int>(arg("min_polymer_size")=10)) diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index d473247e656e17639bc903ce900cfafb42cea83c..42c571d79444add71b47a8da9916a106a2e1caf1 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -20,6 +20,9 @@ set(OST_MOL_ALG_HEADERS similarity_matrix.hh accessibility.hh sec_struct.hh + nonstandard.hh + molck.hh + find_membrane.hh ) set(OST_MOL_ALG_SOURCES @@ -43,6 +46,9 @@ set(OST_MOL_ALG_SOURCES similarity_matrix.cc accessibility.cc sec_struct.cc + nonstandard.cc + molck.cc + find_membrane.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) @@ -58,7 +64,7 @@ if (ENABLE_IMG) entity_to_density.cc ) - set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg) + set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg ost_conop) endif() executable(NAME lddt SOURCES lddt.cc diff --git a/modules/mol/alg/src/accessibility.cc b/modules/mol/alg/src/accessibility.cc index a1becdd7f83c3de0b939e22788d6bf4bb3f22926..be95bfeb03421916779f12af120dab8af8841faa 100644 --- a/modules/mol/alg/src/accessibility.cc +++ b/modules/mol/alg/src/accessibility.cc @@ -24,14 +24,15 @@ struct Cube { }; struct CubeGrid { - CubeGrid(Real cel, int x_cubes, int y_cubes, int z_cubes, + CubeGrid(Real cel, int x_c, int y_c, int z_c, Real x_min, Real y_min, Real z_min): cube_edge_length(cel), - x_cubes(x_cubes), - y_cubes(y_cubes), - z_cubes(z_cubes), - x_min(x_min), - y_min(y_min), - z_min(z_min) { + x_min(x_min), + y_min(y_min), + z_min(z_min) { + + x_cubes = std::max(1, x_c); + y_cubes = std::max(1, y_c); + z_cubes = std::max(1, z_c); int num_cubes = x_cubes * y_cubes * z_cubes; cubes = new Cube*[num_cubes]; // assign NULL to each cube @@ -49,11 +50,18 @@ struct CubeGrid { delete [] cubes; } + int GetCubeIdx(Real x, Real y, Real z) { + int x_cube = std::min(static_cast<int>((x - x_min) / cube_edge_length), + x_cubes - 1); + int y_cube = std::min(static_cast<int>((y - y_min) / cube_edge_length), + y_cubes - 1); + int z_cube = std::min(static_cast<int>((z - z_min) / cube_edge_length), + z_cubes - 1); + return x_cube * y_cubes * z_cubes + y_cube * z_cubes + z_cube; + } + void AddIndex(Real x, Real y, Real z, int idx) { - int x_cube = (x - x_min) / cube_edge_length; - int y_cube = (y - y_min) / cube_edge_length; - int z_cube = (z - z_min) / cube_edge_length; - int cube_idx = x_cube * y_cubes * z_cubes + y_cube * z_cubes + z_cube; + int cube_idx = this->GetCubeIdx(x, y, z); if(cubes[cube_idx] == NULL) cubes[cube_idx] = new Cube; cubes[cube_idx]->AddIndex(idx); } @@ -203,6 +211,11 @@ Real GetAtomAccessibilityNACCESS(Real x_pos, Real y_pos, Real z_pos, Real a = x_pos - x[i]; Real b = y_pos - y[i]; Real c = a*a + b*b; + + if(c == Real(0.0)) { + return 0.0; + } + dx[i] = a; dy[i] = b; dsqr[i] = c; diff --git a/modules/mol/alg/src/filter_clashes.cc b/modules/mol/alg/src/filter_clashes.cc index 690a10aff11a238a45663186ac6b98c3ace29db5..e0d2bee05a80d0959b3528776d501151f2b388c0 100644 --- a/modules/mol/alg/src/filter_clashes.cc +++ b/modules/mol/alg/src/filter_clashes.cc @@ -118,7 +118,7 @@ std::pair<Real,Real> ClashingDistances::GetClashingDistance(const String& ele1,c std::map <String,std::pair<Real,Real> >::const_iterator find_ci= min_distance_.find(key); if (find_ci == min_distance_.end()) { std::stringstream serr; - serr << "Entry for distance " << stkey << " not found in the parameter table"; + serr << "Entry for distance " << key << " not found in the parameter table"; throw Error(serr.str()); } return find_ci->second; @@ -201,7 +201,7 @@ bool StereoChemicalParams::IsEmpty() const return false; } -StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file) +StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file, bool check) { StereoChemicalParams table; bool found=false; @@ -220,7 +220,7 @@ StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector< if (second_line_str_vec.size()!=4) { std::cout << "The number of elements in one of the lines is wrong" << std::endl; return StereoChemicalParams(); - } + } StringRef item = second_line_str_vec[0]; String res = second_line_str_vec[1].str(); std::pair<bool,float> parse_value = second_line_str_vec[2].to_float(); @@ -231,13 +231,13 @@ StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector< } else { std::cout << "One of the values in the third column is not a number" << std::endl; return StereoChemicalParams(); - }; + } if (parse_stddev.first==true) { stddev=static_cast<Real>(parse_stddev.second); } else { std::cout << "One of the values in the fourth column is not a number" << std::endl; return StereoChemicalParams(); - }; + } std::vector<StringRef> split_item = item.split('-'); String rearranged_item; if (split_item.size() == 2) { @@ -264,7 +264,7 @@ StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector< } else { std::cout << "One of the strings describing the parameter has the wrong format" << std::endl; return StereoChemicalParams(); - } + } table.SetParam(rearranged_item,res,value,stddev); } line_iter++; @@ -276,11 +276,18 @@ StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector< if (found==false) { std::cout << "Could not find the relevant section in the stereo-chemical parameter file" << std::endl; return StereoChemicalParams(); - }; + }; + if (check) { + if (table.IsEmpty()) { + std::stringstream serr; + serr << "Error reading the " << header << " section of the stereo-chemical parameter file."; + throw ost::Error(serr.str()); + } + } return table; }; -ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_props_file) +ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_props_file, bool check) { ClashingDistances table; bool found=false; @@ -299,7 +306,7 @@ ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_pro if (second_line_str_vec.size()!=3) { std::cout << "The number of elements in one of the lines is wrong" << std::endl; return ClashingDistances(); - } + } String item = second_line_str_vec[0].str(); std::pair<bool,float> parse_value = second_line_str_vec[1].to_float(); @@ -310,7 +317,7 @@ ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_pro } else { std::cout << "One of the distance values is not a number" << std::endl; return ClashingDistances(); - }; + } if (parse_stddev.first==true) { stddev=static_cast<Real>(parse_stddev.second); } else { @@ -322,7 +329,7 @@ ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_pro if (itemsr.size() != 3) { std::cout << "One of the strings describing the interacting atoms has the wrong format" << std::endl; return ClashingDistances(); - } + } String ele1=eles[0].str(); String ele2=eles[1].str(); if (ele2 < ele1) { @@ -333,14 +340,21 @@ ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_pro } line_iter++; } - } + } } line_iter++; } if (found==false) { std::cout << "Could not find the relevant section in the stereo-chemical parameter file" << std::endl; return ClashingDistances(); - } + } + if (check) { + if (table.IsEmpty()) { + std::stringstream serr; + serr << "Error reading the Clashing section of the stereo-chemical parameter file."; + throw ost::Error(serr.str()); + } + } return table; } @@ -407,10 +421,15 @@ std::pair<EntityView,StereoChemistryInfo> CheckStereoChemistry(const EntityView& remove_sc=true; if (always_remove_bb==true) { remove_bb=true; - } - String name=atom.GetName(); - if (name=="CA" || name=="N" || name=="O" || name=="C") { - remove_bb=true; + } else { + // we need to check both atom names since the order is random! + // -> for angles and clashes this is not needed + String name1 = atom.GetName(); + String name2 = other_atom.GetName(); + if (name1=="CA" || name1=="N" || name1=="O" || name1=="C" || + name2=="CA" || name2=="N" || name2=="O" || name2=="C") { + remove_bb=true; + } } } else { LOG_VERBOSE("BOND:" << " " << res.GetChain() << " " << res.GetName() << " " << res.GetNumber() << " " << bond_str << " " << min_length << " " << max_length << " " << blength << " " << zscore << " " << "PASS") @@ -615,7 +634,7 @@ std::pair<EntityView,ClashingInfo> FilterClashes(const EntityView& ent, const Cl if (remove_bb) { LOG_VERBOSE("ACTION: removing whole residue " << res); - res.SetBoolProp("steric_clash",true); + res.SetBoolProp("steric_clash_backbone", true); continue; } if (remove_sc) { @@ -628,7 +647,7 @@ std::pair<EntityView,ClashingInfo> FilterClashes(const EntityView& ent, const Cl filtered.AddAtom(atom); } } - res.SetBoolProp("steric_clash",true); + res.SetBoolProp("steric_clash_sidechain", true); continue; } filtered.AddResidue(res, ViewAddFlag::INCLUDE_ATOMS); diff --git a/modules/mol/alg/src/filter_clashes.hh b/modules/mol/alg/src/filter_clashes.hh index f53f123dc09922937382a5bb67502b0add7603c2..fe6d12c75a7303ec637e194d341831c1200ac4b0 100644 --- a/modules/mol/alg/src/filter_clashes.hh +++ b/modules/mol/alg/src/filter_clashes.hh @@ -244,13 +244,13 @@ private: /// \brief Fills a list of reference clashing distances from the content of a parameter file /// /// Requires a list of strings holding the contents of a parameter file, one line per string -ClashingDistances DLLEXPORT_OST_MOL_ALG FillClashingDistances(std::vector<String>& stereo_chemical_props_file); +ClashingDistances DLLEXPORT_OST_MOL_ALG FillClashingDistances(std::vector<String>& stereo_chemical_props_file, bool check=false); /// \brief Fills a list of stereo-chemical statistics from the content of a parameter file /// /// Requires a list of strings holding the contents of a parameter file, one line per string /// The header can be 'Bonds' to read bond statistics or 'Angles' to read angle statistics -StereoChemicalParams DLLEXPORT_OST_MOL_ALG FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file); +StereoChemicalParams DLLEXPORT_OST_MOL_ALG FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file, bool check=false); /// \brief Filters a structure based on detected clashes between non bonded atoms. Entity version /// diff --git a/modules/mol/alg/src/find_membrane.cc b/modules/mol/alg/src/find_membrane.cc new file mode 100644 index 0000000000000000000000000000000000000000..f48fb1149815f65e162e2f39a2c961dff494a0a1 --- /dev/null +++ b/modules/mol/alg/src/find_membrane.cc @@ -0,0 +1,1018 @@ +#include <ost/mol/alg/find_membrane.hh> +#include <ost/mol/alg/accessibility.hh> +#include <ost/geom/vecmat3_op.hh> +#include <ost/message.hh> + +#include <limits> +#include <exception> +#include <list> +#include <cmath> +#include <Eigen/Core> +#include <Eigen/Eigenvalues> + +namespace{ + +// Copyright notice of the Levenberg Marquardt minimizer we use... + +// Copyright (c) 2007, 2008, 2009 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +// +// A simple implementation of levenberg marquardt. +// +// [1] K. Madsen, H. Nielsen, O. Tingleoff. Methods for Non-linear Least +// Squares Problems. +// http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf +// +// TODO(keir): Cite the Lourakis' dogleg paper. + +template<typename Function, + typename Jacobian, + typename Solver = Eigen::JacobiSVD< + Eigen::Matrix<typename Function::FMatrixType::RealScalar, + Function::XMatrixType::RowsAtCompileTime, + Function::XMatrixType::RowsAtCompileTime> > > +class LevenbergMarquardt { + public: + typedef typename Function::XMatrixType::RealScalar Scalar; + typedef typename Function::FMatrixType FVec; + typedef typename Function::XMatrixType Parameters; + typedef Eigen::Matrix<typename Function::FMatrixType::RealScalar, + Function::FMatrixType::RowsAtCompileTime, + Function::XMatrixType::RowsAtCompileTime> JMatrixType; + typedef Eigen::Matrix<typename JMatrixType::RealScalar, + JMatrixType::ColsAtCompileTime, + JMatrixType::ColsAtCompileTime> AMatrixType; + + // TODO(keir): Some of these knobs can be derived from each other and + // removed, instead of requiring the user to set them. + enum Status { + RUNNING, + GRADIENT_TOO_SMALL, // eps > max(J'*f(x)) + RELATIVE_STEP_SIZE_TOO_SMALL, // eps > ||dx|| / ||x|| + ERROR_TOO_SMALL, // eps > ||f(x)|| + HIT_MAX_ITERATIONS, + }; + + LevenbergMarquardt(const Function &f) + : f_(f), df_(f) {} + + struct SolverParameters { + SolverParameters() + : gradient_threshold(1e-20), + relative_step_threshold(1e-20), + error_threshold(1e-16), + initial_scale_factor(1e-3), + max_iterations(200) {} + Scalar gradient_threshold; // eps > max(J'*f(x)) + Scalar relative_step_threshold; // eps > ||dx|| / ||x|| + Scalar error_threshold; // eps > ||f(x)|| + Scalar initial_scale_factor; // Initial u for solving normal equations. + int max_iterations; // Maximum number of solver iterations. + }; + + struct Results { + Scalar error_magnitude; // ||f(x)|| + Scalar gradient_magnitude; // ||J'f(x)|| + int iterations; + Status status; + }; + + Status Update(const Parameters &x, const SolverParameters ¶ms, + JMatrixType *J, AMatrixType *A, FVec *error, Parameters *g) { + *J = df_(x); + *A = (*J).transpose() * (*J); + *error = -f_(x); + *g = (*J).transpose() * *error; + if (g->array().abs().maxCoeff() < params.gradient_threshold) { + return GRADIENT_TOO_SMALL; + } else if (error->norm() < params.error_threshold) { + return ERROR_TOO_SMALL; + } + return RUNNING; + } + + Results minimize(Parameters *x_and_min) { + SolverParameters params; + return minimize(params, x_and_min); + } + + Results minimize(const SolverParameters ¶ms, Parameters *x_and_min) { + Parameters &x = *x_and_min; + JMatrixType J; + AMatrixType A; + FVec error; + Parameters g; + + Results results; + results.status = Update(x, params, &J, &A, &error, &g); + + Scalar u = Scalar(params.initial_scale_factor*A.diagonal().maxCoeff()); + Scalar v = 2; + + Parameters dx, x_new; + int i; + for (i = 0; results.status == RUNNING && i < params.max_iterations; ++i) { +// LOG(INFO) << "iteration: " << i; +// LOG(INFO) << "||f(x)||: " << f_(x).norm(); +// LOG(INFO) << "max(g): " << g.array().abs().maxCoeff(); +// LOG(INFO) << "u: " << u; +// LOG(INFO) << "v: " << v; + AMatrixType A_augmented = A + u*AMatrixType::Identity(J.cols(), J.cols()); + Solver solver(A_augmented, Eigen::ComputeThinU | Eigen::ComputeThinV); + dx = solver.solve(g); + if (dx.norm() <= params.relative_step_threshold * x.norm()) { + results.status = RELATIVE_STEP_SIZE_TOO_SMALL; + break; + } + + x_new = x + dx; + // Rho is the ratio of the actual reduction in error to the reduction + // in error that would be obtained if the problem was linear. + // See [1] for details. + Scalar rho((error.squaredNorm() - f_(x_new).squaredNorm()) + / dx.dot(u*dx + g)); + if (rho > 0) { + // Accept the Gauss-Newton step because the linear model fits well. + x = x_new; + results.status = Update(x, params, &J, &A, &error, &g); + Scalar tmp = Scalar(2*rho-1); + u = u*std::max(Scalar(1/3.), 1 - (tmp*tmp*tmp)); + v = 2; + continue; + } + + // Reject the update because either the normal equations failed to solve + // or the local linear model was not good (rho < 0). Instead, increase + // to move closer to gradient descent. + u *= v; + v *= 2; + } + if (results.status == RUNNING) { + results.status = HIT_MAX_ITERATIONS; + } + results.error_magnitude = error.norm(); + results.gradient_magnitude = g.norm(); + results.iterations = i; + return results; + } + + private: + const Function &f_; + Jacobian df_; +}; + +geom::Mat3 RotationAroundAxis(geom::Vec3 axis, Real angle) { + + Real aa, ab, ac, ba, bb, bc, ca, cb, cc, one_m_cos, cos_ang, sin_ang; + + cos_ang = std::cos(angle); + sin_ang = std::sin(angle); + one_m_cos = 1-cos_ang; + + aa = cos_ang+axis[0]*axis[0]*one_m_cos; + ab = axis[0]*axis[1]*one_m_cos-axis[2]*sin_ang; + ac = axis[0]*axis[2]*one_m_cos+axis[1]*sin_ang; + + ba = axis[1]*axis[0]*one_m_cos+axis[2]*sin_ang; + bb = cos_ang+axis[1]*axis[1]*one_m_cos; + bc = axis[1]*axis[2]*one_m_cos-axis[0]*sin_ang; + + ca = axis[2]*axis[0]*one_m_cos-axis[1]*sin_ang; + cb = axis[2]*axis[1]*one_m_cos+axis[0]*sin_ang; + cc = cos_ang+axis[2]*axis[2]*one_m_cos; + + geom::Mat3 result(aa, ab, ac, ba, bb, bc, ca, cb, cc); + return result; +} + +geom::Vec3 RotateAroundAxis(geom::Vec3 point, geom::Vec3 axis, Real angle) { + + geom::Mat3 rot = RotationAroundAxis(axis, angle); + geom::Vec3 result = rot*point; + return result; +} + + +// Levenberg Marquardt specific objects for the membrane finding algorithm + +struct EnergyF { + EnergyF(const std::vector<geom::Vec3>& p, const std::vector<Real>& t_e, + Real l, Real o, const geom::Vec3& ax, const geom::Vec3& tilt_ax): + positions(p), transfer_energies(t_e), axis(ax), + tilt_axis(tilt_ax), lambda(l), offset(o) { } + + typedef Eigen::Matrix<Real, 4, 1> XMatrixType; + typedef Eigen::Matrix<Real, 1, 1> FMatrixType; + + Eigen::Matrix<Real,1,1> operator()(const Eigen::Matrix<Real, 4, 1>& x) const { + + FMatrixType result; + result(0,0) = 0.0; + geom::Vec3 tilted_axis = axis; + + tilted_axis = RotateAroundAxis(tilted_axis, tilt_axis, x(0, 0)); + tilted_axis = RotateAroundAxis(tilted_axis, axis, x(1, 0)); + + Real pos_on_axis; + Real distance_to_center; + Real half_width = Real(0.5) * x(2, 0); + Real exponent; + Real one_over_lambda = Real(1.0) / lambda; + + int n = transfer_energies.size(); + + for(int i = 0; i < n; ++i) { + pos_on_axis = geom::Dot(tilted_axis, positions[i]); + distance_to_center = std::abs(x(3, 0) - pos_on_axis); + exponent = (distance_to_center - half_width) * one_over_lambda; + result(0, 0) += (1.0 / (1.0 + std::exp(exponent))) * transfer_energies[i]; + } + + result(0, 0) += offset; + + return result; + } + + std::vector<geom::Vec3> positions; + std::vector<Real> transfer_energies; + geom::Vec3 axis; + geom::Vec3 tilt_axis; + Real lambda; + + // define an offset parameter... + // The levenberg-marquardt algorithm is designed to minimize an error + // function, aims to converge towards zero. The offset parameter gets added + // at the end of the energy calculation to get a positive result => + // forces algorithm to minimize + Real offset; +}; + + +struct EnergyDF { + + EnergyDF(const EnergyF& f): function(f),d_tilt(0.02),d_angle(0.02), + d_width(0.4),d_pos(0.4) { } + + Eigen::Matrix<Real,1,4> operator()(const Eigen::Matrix<Real, 4, 1>& x) const { + + Eigen::Matrix<Real,1,4> result; + Eigen::Matrix<Real, 4, 1> parameter1 = x; + Eigen::Matrix<Real, 4, 1> parameter2 = x; + + parameter1(0,0)+=d_tilt; + parameter2(0,0)-=d_tilt; + result(0,0) = (function(parameter1)(0, 0) - + function(parameter2)(0, 0)) / (2*d_tilt); + + parameter1=x; + parameter2=x; + parameter1(1,0)+=d_angle; + parameter2(1,0)-=d_angle; + result(0,1) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_angle); + + parameter1=x; + parameter2=x; + parameter1(2,0)+=d_width; + parameter2(2,0)-=d_width; + result(0,2) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_width); + + parameter1=x; + parameter2=x; + parameter1(3,0)+=d_pos; + parameter2(3,0)-=d_pos; + result(0,3) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_pos); + return result; + } + + EnergyF function; + Real d_tilt; + Real d_angle; + Real d_width; + Real d_pos; +}; + + +void GetRanges(const std::vector<geom::Vec3>& atom_positions, + Real& min_x, Real& max_x, Real& min_y, Real& max_y, + Real& min_z, Real& max_z) { + + min_x = std::numeric_limits<Real>::max(); + max_x = -min_x; + + min_y = std::numeric_limits<Real>::max(); + max_y = -min_y; + + min_z = std::numeric_limits<Real>::max(); + max_z = -min_z; + + for(uint i = 0; i < atom_positions.size(); ++i) { + const geom::Vec3& pos = atom_positions[i]; + min_x = std::min(min_x, pos[0]); + max_x = std::max(max_x, pos[0]); + min_y = std::min(min_y, pos[1]); + max_y = std::max(max_y, pos[1]); + min_z = std::min(min_z, pos[2]); + max_z = std::max(max_z, pos[2]); + } +} + + +void FloodLevel(char* data, int x_start, int y_start, + int x_extent, int y_extent, + int orig_value, int dest_value) { + + //http://lodev.org/cgtutor/floodfill.html + if(orig_value != data[x_start*y_extent + y_start]) { + return; + } + + std::vector<std::pair<int,int> > queue; + queue.push_back(std::make_pair(x_start, y_start)); + + int y1,y,x; + bool spanLeft, spanRight; + std::pair<int,int> actual_position; + + while(!queue.empty()){ + + actual_position = queue.back(); + queue.pop_back(); + x = actual_position.first; + y = actual_position.second; + + y1 = y; + + while(y1 >= 0 && data[x*y_extent + y1] == orig_value) { + --y1; + } + + y1++; + spanLeft = spanRight = 0; + + while(y1 < y_extent && + data[x*y_extent + y1] == orig_value ) { + + data[x*y_extent + y1] = dest_value; + + if(!spanLeft && x > 0 && + data[(x-1)*y_extent + y1] == orig_value) { + queue.push_back(std::make_pair(x-1,y1)); + spanLeft = 1; + } + else if(spanLeft && x > 0 && + data[(x-1)*y_extent + y1] != orig_value) { + spanLeft = 0; + } + + if(!spanRight && x < x_extent - 1 && + data[(x+1)*y_extent + y1] == orig_value) { + queue.push_back(std::make_pair(x+1,y1)); + spanRight = 1; + } + else if(spanRight && x < x_extent - 1 && + data[(x+1)*y_extent + y1] != orig_value) { + spanRight = 0; + } + ++y1; + } + } +} + + +void GetExposedAtoms(const std::vector<geom::Vec3>& atom_positions, + const std::vector<Real>& transfer_energies, + std::vector<geom::Vec3>& exposed_atom_positions, + std::vector<Real>& exposed_transfer_energies) { + + // sum of approx. vdw radius of the present heavy atoms (1.8) + // plus 1.4 for water. + Real radius = 3.2; + Real one_over_radius = Real(1.0) / radius; + + // lets setup a grid in which we place the atoms + Real min_x, max_x, min_y, max_y, min_z, max_z; + GetRanges(atom_positions, min_x, max_x, min_y, max_y, min_z, max_z); + + // we guarantee that the thing is properly solvated in the x-y plane and add + // some space around this is also necessary to avoid overflow checks in + // different places + min_x -= Real(2.1) * radius; + min_y -= Real(2.1) * radius; + min_z -= Real(2.1) * radius; + max_x += Real(2.1) * radius; + max_y += Real(2.1) * radius; + max_z += Real(2.1) * radius; + + int num_xbins = std::ceil((max_x - min_x) * one_over_radius); + int num_ybins = std::ceil((max_y - min_y) * one_over_radius); + int num_zbins = std::ceil((max_z - min_z) * one_over_radius); + int num_bins = num_xbins * num_ybins * num_zbins; + char* grid = new char[num_bins]; + memset(grid, 0, num_bins); + + for(uint i = 0; i < atom_positions.size(); ++i) { + + const geom::Vec3& pos = atom_positions[i]; + int x_bin = (pos[0] - min_x) * one_over_radius; + int y_bin = (pos[1] - min_y) * one_over_radius; + int z_bin = (pos[2] - min_z) * one_over_radius; + + // we're really crude here and simply set all 27 cubes with central + // cube defined by x_bin, y_bin and z_bin to one + for(int z = z_bin - 1; z <= z_bin + 1; ++z) { + for(int x = x_bin - 1; x <= x_bin + 1; ++x) { + for(int y = y_bin - 1; y <= y_bin + 1; ++y) { + grid[z*num_xbins*num_ybins + x*num_ybins + y] = 1; + } + } + } + } + + // lets call flood fill for every layer along the z-axis from every + // corner in the x-y plane. + for(int z = 0; z < num_zbins; ++z) { + char* level = &grid[z*num_xbins*num_ybins]; + FloodLevel(level, 0, 0, num_xbins, num_ybins, 0, 2); + FloodLevel(level, 0, num_ybins - 1, num_xbins, num_ybins, 0, 2); + FloodLevel(level, num_xbins - 1, 0, num_xbins, num_ybins, 0, 2); + FloodLevel(level, num_xbins - 1, num_ybins - 1, num_xbins, num_ybins, 0, 2); + } + + // every cube in every layer that has currently value 1 that has a city-block + // distance below 3 to any cube with value 2 is considered to be in contact + // with the outer surface... lets set them to a value of three + for(int z = 0; z < num_zbins; ++z) { + char* level = &grid[z*num_xbins*num_ybins]; + for(int x = 0; x < num_xbins; ++x) { + for(int y = 0; y < num_ybins; ++y) { + if(level[x*num_ybins + y] == 1) { + int x_from = std::max(0, x - 3); + int x_to = std::min(num_xbins-1, x + 3); + int y_from = std::max(0, y - 3); + int y_to = std::min(num_ybins-1, y + 3); + bool exposed = false; + for(int i = x_from; i <= x_to && !exposed; ++i) { + for(int j = y_from; j <= y_to; ++j) { + if(level[i*num_ybins + j] == 2) { + level[x*num_ybins + y] = 3; + exposed = true; + break; + } + } + } + } + } + } + } + + // all positions that lie in a cube with value 3 are considered to be exposed... + exposed_atom_positions.clear(); + exposed_transfer_energies.clear(); + for(uint i = 0; i < atom_positions.size(); ++i) { + const geom::Vec3& pos = atom_positions[i]; + int x_bin = (pos[0] - min_x) * one_over_radius; + int y_bin = (pos[1] - min_y) * one_over_radius; + int z_bin = (pos[2] - min_z) * one_over_radius; + if(grid[z_bin*num_xbins*num_ybins + x_bin*num_ybins + y_bin] == 3) { + exposed_atom_positions.push_back(pos); + exposed_transfer_energies.push_back(transfer_energies[i]); + } + } + + // cleanup + delete [] grid; +} + + +void ScanAxis(const std::vector<geom::Vec3>& atom_positions, + const std::vector<Real>& transfer_energies, + const geom::Vec3& axis, + Real& best_width, Real& best_center, Real& best_energy) { + + int n_pos = atom_positions.size(); + + geom::Vec3 normalized_axis = geom::Normalize(axis); + std::vector<Real> pos_on_axis(n_pos); + + for(int i = 0; i < n_pos; ++i) { + pos_on_axis[i] = geom::Dot(atom_positions[i], normalized_axis); + } + + Real min_pos = pos_on_axis[0]; + Real max_pos = min_pos; + + for(int i = 1; i < n_pos; ++i) { + min_pos = std::min(min_pos, pos_on_axis[i]); + max_pos = std::max(max_pos, pos_on_axis[i]); + } + + min_pos = std::floor(min_pos); + max_pos = std::ceil(max_pos); + + int full_width = int(max_pos - min_pos) + 1; + + //energies representing the energy profile along the axis + std::vector<Real> mapped_energies(full_width, 0.0); + + for(int i = 0; i < n_pos; ++i) { + mapped_energies[int(pos_on_axis[i] - min_pos)] += transfer_energies[i]; + } + + best_width = 0; + best_center = 0; + best_energy = std::numeric_limits<Real>::max(); + + for(int window_width = 10; window_width <= 40; ++window_width) { + + if(window_width > full_width) { + break; + } + + Real energy=0.0; + for(int i = 0; i < window_width; ++i) { + energy += mapped_energies[i]; + } + + if(energy < best_energy) { + best_width = window_width; + best_center = min_pos + Real(0.5) * window_width + Real(0.5); + best_energy = energy; + } + + for(int pos = 1; pos < full_width - window_width; ++pos) { + energy -= mapped_energies[pos-1]; + energy += mapped_energies[pos+window_width-1]; + if(energy < best_energy){ + best_width = window_width; + best_center = min_pos + pos + Real(0.5) * window_width + Real(0.5); + best_energy = energy; + } + } + } +} + + +struct LMInput { + ost::mol::alg::FindMemParam mem_param; + geom::Transform initial_transform; + std::vector<geom::Vec3> exposed_atom_positions; + std::vector<Real> exposed_transfer_energies; +}; + + +void SampleZ(const std::vector<geom::Vec3>& atom_pos, + const std::vector<Real>& transfer_energies, + const geom::Transform& initial_transform, + int n_solutions, std::list<LMInput>& top_solutions) { + + + std::vector<geom::Vec3> transformed_atom_pos(atom_pos.size()); + for(uint at_idx = 0; at_idx < atom_pos.size(); ++at_idx) { + transformed_atom_pos[at_idx] = initial_transform.Apply(atom_pos[at_idx]); + } + + std::vector<geom::Vec3> exposed_atom_positions; + std::vector<Real> exposed_transfer_energies; + GetExposedAtoms(transformed_atom_pos, transfer_energies, + exposed_atom_positions, exposed_transfer_energies); + + std::vector<Real> tilt_angles; + std::vector<Real> rotation_angles; + for(int tilt_deg = 0; tilt_deg <= 45; tilt_deg += 5) { + if(tilt_deg == 0) { + tilt_angles.push_back(0.0); + rotation_angles.push_back(0.0); + } + else { + Real tilt_angle = Real(tilt_deg) / Real(180.) * Real(M_PI); + for(int angle_deg = 0; angle_deg < 360; angle_deg += 5) { + tilt_angles.push_back(tilt_angle); + rotation_angles.push_back(Real(angle_deg) / Real(180.) * Real(M_PI)); + } + } + } + + geom::Vec3 normalized_axis(0.0,0.0,1.0); + geom::Vec3 tilt_axis(1.0,0.0,0.0); + + for(uint i = 0; i < tilt_angles.size(); ++i) { + + Real tilt_angle = tilt_angles[i]; + Real rotation_angle = rotation_angles[i]; + + geom::Vec3 tilted_axis = RotateAroundAxis(normalized_axis, tilt_axis, + tilt_angle); + geom::Vec3 scan_axis = RotateAroundAxis(tilted_axis, normalized_axis, + rotation_angle); + + Real actual_width, actual_center, actual_energy; + ScanAxis(exposed_atom_positions, exposed_transfer_energies, scan_axis, + actual_width, actual_center, actual_energy); + + if(static_cast<int>(top_solutions.size()) >= n_solutions && + actual_energy > top_solutions.back().mem_param.energy) { + continue; + } + + LMInput lm_input; + lm_input.mem_param.axis = normalized_axis; + lm_input.mem_param.tilt_axis = tilt_axis; + lm_input.mem_param.tilt = tilt_angle; + lm_input.mem_param.angle = rotation_angle; + lm_input.mem_param.width = actual_width; + lm_input.mem_param.pos = actual_center; + lm_input.mem_param.energy = actual_energy; + lm_input.initial_transform = initial_transform; + lm_input.exposed_atom_positions = exposed_atom_positions; + lm_input.exposed_transfer_energies = exposed_transfer_energies; + + if(top_solutions.empty()) { + top_solutions.push_back(lm_input); + } + else { + bool added = false; + for(std::list<LMInput>::iterator sol_it = top_solutions.begin(); + sol_it != top_solutions.end(); ++sol_it) { + if(sol_it->mem_param.energy > lm_input.mem_param.energy) { + top_solutions.insert(sol_it, lm_input); + added = true; + break; + } + } + + if(!added) { + top_solutions.push_back(lm_input); + } + + while(static_cast<int>(top_solutions.size()) > n_solutions) { + top_solutions.pop_back(); + } + } + } +} + + +ost::mol::alg::FindMemParam GetFinalSolution(const std::list<LMInput>& top_solutions, + Real lambda) { + + Real best_energy = std::numeric_limits<Real>::max(); + std::list<LMInput>::const_iterator best_sol_it = top_solutions.begin(); + Eigen::Matrix<Real, 4, 1> lm_parameters; + Eigen::Matrix<Real, 4, 1> best_lm_parameters; + LevenbergMarquardt<EnergyF, EnergyDF>::Results lm_result; + + for(std::list<LMInput>::const_iterator sol_it = top_solutions.begin(); + sol_it != top_solutions.end(); ++sol_it) { + + Real offset = std::max(Real(20000.), std::abs(sol_it->mem_param.energy * 2)); + + EnergyF en_f(sol_it->exposed_atom_positions, + sol_it->exposed_transfer_energies, + lambda, offset, + sol_it->mem_param.axis, + sol_it->mem_param.tilt_axis); + + lm_parameters(0,0) = sol_it->mem_param.tilt; + lm_parameters(1,0) = sol_it->mem_param.angle; + lm_parameters(2,0) = sol_it->mem_param.width; + lm_parameters(3,0) = sol_it->mem_param.pos; + + LevenbergMarquardt<EnergyF,EnergyDF> lm(en_f); + lm_result = lm.minimize(&lm_parameters); + + Real minimized_energy = en_f(lm_parameters)(0, 0) - en_f.offset; + + if(minimized_energy < best_energy) { + best_energy = minimized_energy; + best_sol_it = sol_it; + best_lm_parameters = lm_parameters; + } + } + + ost::mol::alg::FindMemParam mem_param = best_sol_it->mem_param; + mem_param.energy = best_energy; + mem_param.tilt = best_lm_parameters(0,0); + mem_param.angle = best_lm_parameters(1,0); + mem_param.width = best_lm_parameters(2,0); + mem_param.pos = best_lm_parameters(3,0); + + // the solution is still relative to the initial transform that has + // been applied when calling the SampleZ funtion! + geom::Transform t = best_sol_it->initial_transform; + mem_param.tilt_axis = t.ApplyInverse(mem_param.tilt_axis); + mem_param.axis = t.ApplyInverse(mem_param.axis); + + return mem_param; +} + + +ost::mol::EntityHandle CreateMembraneRepresentation( + const std::vector<geom::Vec3>& atom_positions, + const ost::mol::alg::FindMemParam& param, + Real membrane_margin = 15, + Real delta = 2.0) { + + // let's first construct two planes defining the membrane + geom::Vec3 membrane_axis = param.GetMembraneAxis(); + geom::Vec3 one = param.pos * membrane_axis + + membrane_axis * param.width / 2; + geom::Vec3 two = param.pos * membrane_axis - + membrane_axis * param.width / 2; + geom::Plane plane_one = geom::Plane(one, membrane_axis); + geom::Plane plane_two = geom::Plane(two, membrane_axis); + + // let's find all positions that are somehow close to those planes + geom::Vec3List close_pos; + geom::Vec3List close_pos_one; + geom::Vec3List close_pos_two; + + for(uint i = 0; i < atom_positions.size(); ++i) { + + Real d1 = geom::Distance(plane_one, atom_positions[i]); + Real d2 = geom::Distance(plane_two, atom_positions[i]); + + if(d1 < Real(3.)) { + close_pos_one.push_back(atom_positions[i]); + } + + if(d2 < Real(3.)) { + close_pos_two.push_back(atom_positions[i]); + } + + if(d1 < Real(3.) || d2 < Real(3.)) { + close_pos.push_back(atom_positions[i]); + } + } + + // the geometric center of the close pos vector in combination with the + // membrane axis define the central "line" of the disks that will represent + // the membrane + geom::Vec3 center_pos = close_pos.GetCenter(); + geom::Line3 center_line = geom::Line3(center_pos, center_pos + membrane_axis); + + // the final radius of the "disks" is based on the maximal distance of any + // position in close_pos to the center_line plus the membrane_margin + + Real max_d_to_center_line = 0; + for(uint i = 0; i < close_pos.size(); ++i) { + Real d = geom::Distance(center_line, close_pos[i]); + max_d_to_center_line = std::max(max_d_to_center_line, d); + } + + Real disk_radius = max_d_to_center_line + membrane_margin; + int num_sampling_points = (Real(2.) * disk_radius) / delta; + + // reassign the top and bottom positions, that have been only arbitrary + // points on the membrane planes + one = geom::IntersectionPoint(center_line, plane_one); + two = geom::IntersectionPoint(center_line, plane_two); + + // find a pair of perpendicular vectors, that are on the plane + geom::Vec3 arbitrary_vec(1.0, 0.0, 0.0); + if(geom::Angle(membrane_axis, arbitrary_vec) < 0.1) { + // parallel is not cool in this case + arbitrary_vec = geom::Vec3(0.0, 1.0, 0.0); + } + geom::Vec3 plane_x = geom::Normalize(geom::Cross(membrane_axis, arbitrary_vec)); + geom::Vec3 plane_y = geom::Normalize(geom::Cross(membrane_axis, plane_x)); + + // final representing positions come in here + std::vector<geom::Vec3> final_pos; + + // do plane one + geom::Vec3 origin = one - delta * num_sampling_points * 0.5 * plane_x - + delta * num_sampling_points * 0.5 * plane_y; + + for(int i = 0; i < num_sampling_points; ++i) { + for(int j = 0; j < num_sampling_points; ++j) { + geom::Vec3 pos = origin + i*delta*plane_x + j*delta*plane_y; + if(geom::Distance(pos, one) < disk_radius) { + bool far_far_away = true; + // this is slow... + for(uint k = 0; k < close_pos_one.size(); ++k) { + if(geom::Length2(pos - close_pos_one[k]) < Real(16.)) { + far_far_away = false; + break; + } + } + if(far_far_away) { + final_pos.push_back(pos); + } + } + } + } + + // do plane two + origin = two - delta * num_sampling_points * 0.5 * plane_x - + delta * num_sampling_points * 0.5 * plane_y; + + for(int i = 0; i < num_sampling_points; ++i) { + for(int j = 0; j < num_sampling_points; ++j) { + geom::Vec3 pos = origin + i*delta*plane_x + j*delta*plane_y; + if(geom::Distance(pos, two) < disk_radius) { + bool far_far_away = true; + // this is slow... + for(uint k = 0; k < close_pos_two.size(); ++k) { + if(geom::Length2(pos - close_pos_two[k]) < Real(16.)) { + far_far_away = false; + break; + } + } + if(far_far_away) { + final_pos.push_back(pos); + } + } + } + } + + // create hacky entity that contains membrane representing positions and + // return + ost::mol::EntityHandle membrane_ent = ost::mol::CreateEntity(); + ost::mol::XCSEditor ed = membrane_ent.EditXCS(); + + ost::mol::ChainHandle chain = ed.InsertChain("M"); + ost::mol::ResidueHandle res = ed.AppendResidue(chain, "MEM"); + String atom_names = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + uint atom_name_idx = 0; + uint atom_name_secondary_idx = 0; + + for(uint i = 0; i < final_pos.size(); ++i) { + + if(atom_name_secondary_idx == atom_names.size()) { + ++atom_name_idx; + atom_name_secondary_idx = 0; + } + if(atom_name_idx == atom_names.size()) { + res = ed.AppendResidue(chain, "MEM"); + atom_name_idx = 0; + atom_name_secondary_idx = 0; + } + + String atom_name = "--"; + atom_name[0] = atom_names[atom_name_idx]; + atom_name[1] = atom_names[atom_name_secondary_idx]; + + ed.InsertAtom(res, atom_name, final_pos[i]); + ++atom_name_secondary_idx; + } + + return membrane_ent; +} + + +} // anon namespace + + +namespace ost{ namespace mol{ namespace alg{ + +geom::Vec3 FindMemParam::GetMembraneAxis() const { + + geom::Vec3 result = RotateAroundAxis(axis,tilt_axis,tilt); + result = RotateAroundAxis(result,axis,angle); + return result; +} + + +FindMemParam FindMembrane(ost::mol::EntityView& ent, + bool assign_membrane_representation, bool fast) { + + ost::mol::EntityView peptide_view = ent.Select("peptide=true and ele!=H"); + Accessibility(peptide_view); + + std::vector<geom::Vec3> atom_pos; + std::vector<Real> transfer_energies; + + atom_pos.reserve(peptide_view.GetAtomCount()); + transfer_energies.reserve(peptide_view.GetAtomCount()); + + ost::mol::AtomViewList atoms = peptide_view.GetAtomList(); + String stupid_string("S_N_O_C"); + + for(ost::mol::AtomViewList::iterator it = atoms.begin(); + it != atoms.end(); ++it) { + + if(!it->HasProp("asaAtom")) { + continue; + } + + String element = it->GetElement(); + if(stupid_string.find(element) == std::string::npos) { + continue; + } + + Real asa = it->GetFloatProp("asaAtom"); + atom_pos.push_back(it->GetPos()); + + if(element == "S") { + transfer_energies.push_back(asa * Real(10.0)); + } + else if(element == "N") { + transfer_energies.push_back(asa * Real(53.0)); + } + else if(element == "O") { + transfer_energies.push_back(asa * Real(57.0)); + } + else if(element == "C") { + // check whether we find a double bond to distinguish between + // hibridization states + bool assigned_energy = false; + ost::mol::BondHandleList bond_list = it->GetBondList(); + for(ost::mol::BondHandleList::iterator bond_it = bond_list.begin(); + bond_it != bond_list.end(); ++bond_it){ + unsigned char bond_order = bond_it->GetBondOrder(); + if(bond_order > '1'){ + transfer_energies.push_back(asa * Real(-19.0)); + assigned_energy = true; + break;; + } + } + if(!assigned_energy) { + transfer_energies.push_back(asa * Real(-22.6)); + } + } + } + + if(atom_pos.size() < 10) { + throw ost::Error("Cannot detect membrane with such a low number " + "of heavy atoms!"); + } + + // we always optimizer along the z-axis. + // We therefore have to transform the positions. We use a rotation + // around the z-axis with subsequent rotation around the x-axis for this task + std::vector<geom::Transform> transformations; + int n_euler_angles = 3; + int n_transformations = n_euler_angles * n_euler_angles * n_euler_angles; + std::vector<Real> euler_angles(n_euler_angles); + euler_angles[0] = 0.0; + euler_angles[1] = M_PI/3; + euler_angles[2] = 2*M_PI/3; + + for(int i = 0; i < n_euler_angles; ++i) { + for(int j = 0; j < n_euler_angles; ++j) { + for(int k = 0; k < n_euler_angles; ++k) { + geom::Mat3 rot_matrix = geom::EulerTransformation(euler_angles[i], + euler_angles[j], + euler_angles[k]); + geom::Transform transform; + transform.SetRot(rot_matrix); + transformations.push_back(transform); + } + } + } + + // lets use the generated transforms to search for initial solutions that can + // then be fed into a final minimization... + std::list<LMInput> top_solutions; + int n_initial_solutions = fast ? 1 : 20; + + for(int transformation_idx = 0; transformation_idx < n_transformations; + ++transformation_idx) { + SampleZ(atom_pos, transfer_energies, transformations[transformation_idx], + n_initial_solutions, top_solutions); + } + + // Perform the final minimization and return the best solution. + // please note, that the returned solution is transformed back in order + // to match the initial atom positions + FindMemParam final_solution = GetFinalSolution(top_solutions, 0.9); + + if(assign_membrane_representation) { + final_solution.membrane_representation = CreateMembraneRepresentation( + atom_pos, + final_solution); + } + + return final_solution; +} + + +FindMemParam FindMembrane(ost::mol::EntityHandle& ent, + bool assign_membrane_representation, + bool fast) { + + ost::mol::EntityView ent_view = ent.CreateFullView(); + return FindMembrane(ent_view, assign_membrane_representation, fast); +} + +}}} // ns diff --git a/modules/mol/alg/src/find_membrane.hh b/modules/mol/alg/src/find_membrane.hh new file mode 100644 index 0000000000000000000000000000000000000000..962ea68fc09680304c2da095f78900eff5483225 --- /dev/null +++ b/modules/mol/alg/src/find_membrane.hh @@ -0,0 +1,31 @@ +#include <ost/mol/mol.hh> + +#include <ost/geom/geom.hh> +#include <ost/io/binary_data_source.hh> +#include <ost/io/binary_data_sink.hh> + +namespace ost { namespace mol{ namespace alg{ + +struct FindMemParam{ + FindMemParam() { } + + geom::Vec3 GetMembraneAxis() const; + geom::Vec3 axis; + geom::Vec3 tilt_axis; + Real tilt; + Real angle; + Real width; + Real pos; + Real energy; + ost::mol::EntityHandle membrane_representation; +}; + +FindMemParam FindMembrane(ost::mol::EntityHandle& ent, + bool assign_membrane_representation, + bool fast); + +FindMemParam FindMembrane(ost::mol::EntityView& ent, + bool assign_membrane_representation, + bool fast); + +}}} // ns diff --git a/modules/mol/alg/src/lddt.cc b/modules/mol/alg/src/lddt.cc index 11e328e4deb40c00117dd666337af25a57112fc3..a64f5bb192d5abab6777b750786858d1fd477d16 100644 --- a/modules/mol/alg/src/lddt.cc +++ b/modules/mol/alg/src/lddt.cc @@ -32,6 +32,7 @@ #include <ost/mol/alg/filter_clashes.hh> #include <ost/io/mol/pdb_reader.hh> #include <ost/io/io_exception.hh> +#include <ost/io/stereochemical_params_reader.hh> #include <ost/conop/conop.hh> #include <ost/conop/compound_lib.hh> #include <ost/string_ref.hh> @@ -101,30 +102,6 @@ void usage() } -// computes coverage -std::pair<int,int> compute_coverage (const EntityView& v,const GlobalRDMap& glob_dist_list) -{ - int second=0; - int first=0; - if (v.GetResidueList().size()==0) { - if (glob_dist_list.size()==0) { - return std::make_pair(0,-1); - } else { - return std::make_pair(0,glob_dist_list.size()); - } - } - ChainView vchain=v.GetChainList()[0]; - for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i) - { - ResNum rnum = (*i).first; - second++; - if (vchain.FindResidue(rnum)) { - first++; - } - } - return std::make_pair(first,second); -} - CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { @@ -174,33 +151,22 @@ CompoundLibPtr load_compound_lib(const String& custom_path) } return CompoundLibPtr(); } -bool is_resnum_in_globalrdmap(const ResNum& resnum, const GlobalRDMap& glob_dist_list) -{ - for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) { - ResNum rn = i->first; - if (rn==resnum) { - return true; - } - } - return false; -} int main (int argc, char **argv) { // sets some default values for parameters String version = OST_VERSION_STRING; + lDDTSettings settings; + String parameter_file_path; + bool structural_checks = false; + bool ignore_consistency_checks = false; Real bond_tolerance = 12.0; Real angle_tolerance = 12.0; - Real radius=15.0; - int sequence_separation = 0; - + String sel; // creates the required loading profile IOProfile profile; // parses options - String sel; String custom_path; - bool structural_checks=false; - bool consistency_checks=true; po::options_description desc("Options"); desc.add_options() ("calpha,c", "consider only calpha atoms") @@ -256,14 +222,14 @@ int main (int argc, char **argv) structural_checks=true; } if (vm.count("ignore-consistency-checks")) { - consistency_checks=false; + ignore_consistency_checks=true; } if (vm.count("tolerant")) { profile.fault_tolerant=true; } - String parameter_filename; + if (vm.count("parameter-file")) { - parameter_filename=vm["parameter-file"].as<String>(); + parameter_file_path=vm["parameter-file"].as<String>(); } else if (structural_checks==true) { std::cout << "Please specify a stereo-chemical parameter file" << std::endl; exit(-1); @@ -293,17 +259,11 @@ int main (int argc, char **argv) angle_tolerance=vm["angle_tolerance"].as<Real>(); } if (vm.count("inclusion_radius")) { - radius=vm["inclusion_radius"].as<Real>(); + settings.radius=vm["inclusion_radius"].as<Real>(); } if (vm.count("sequence_separation")) { - sequence_separation=vm["sequence_separation"].as<int>(); + settings.sequence_separation=vm["sequence_separation"].as<int>(); } - - std::vector<Real> cutoffs; - cutoffs.push_back(0.5); - cutoffs.push_back(1.0); - cutoffs.push_back(2.0); - cutoffs.push_back(4.0); std::vector<EntityView> ref_list; @@ -311,55 +271,45 @@ int main (int argc, char **argv) // if the reference file is a comma-separated list of files, switches to multi- // reference mode GlobalRDMap glob_dist_list; - String ref_file=files.back(); + String ref_file=files.back(); ost::StringRef ref_file_sr(ref_file.c_str(),ref_file.length()); std::vector<StringRef> ref_file_split_sr=ref_file_sr.split(','); - if (ref_file_split_sr.size()==1) { - std::cout << "Multi-reference mode: Off" << std::endl; - String ref_filename = ref_file_split_sr[0].str(); + for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin(); + ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) { + String ref_filename = ref_file_split_sr_it->str(); EntityHandle ref=load(ref_filename, profile); if (!ref) { exit(-1); - } - EntityView refview=ref.GetChainList()[0].Select("peptide=true"); - ref_list.push_back(refview); - glob_dist_list = CreateDistanceList(refview,radius); - } else { - std::cout << "Multi-reference mode: On" << std::endl; - for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin(); - ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) { - String ref_filename = ref_file_split_sr_it->str(); - EntityHandle ref=load(ref_filename, profile); - if (!ref) { + } + if (sel != ""){ + std::cout << "Performing \"" << sel << "\" selection on reference " << ref_filename << std::endl; + try { + ref_list.push_back(ref.Select(sel)); + } catch (const ost::mol::QueryError& e) { + std::cerr << "Provided selection argument failed." << std::endl << e.GetFormattedMessage() << std::endl; exit(-1); } - if (! ref_list.empty()) { - if (ref_list[0].GetChainList()[0].GetName()!=ref.GetChainList()[0].GetName()) { - std::cout << "ERROR: First chains in the reference structures have different names" << std::endl; - exit(-1); - } - } - EntityView refview=ref.GetChainList()[0].Select("peptide=true"); + } + else { ref_list.push_back(ref.CreateFullView()); - } - glob_dist_list = CreateDistanceListFromMultipleReferences (ref_list,cutoffs,sequence_separation,radius); - } + } + } + CleanlDDTReferences(ref_list); + if (ref_list.size()==1) { + std::cout << "Multi-reference mode: Off" << std::endl; + } else { + std::cout << "Multi-reference mode: On" << std::endl; + } + glob_dist_list = PreparelDDTGlobalRDMap(ref_list, + settings.cutoffs, + settings.sequence_separation, + settings.radius); files.pop_back(); // prints out parameters used in the lddt calculation std::cout << "Verbosity level: " << verbosity_level << std::endl; + settings.PrintParameters(); if (structural_checks) { - std::cout << "Stereo-chemical and steric clash checks: On " << std::endl; - } else { - std::cout << "Stereo-chemical and steric clash checks: Off " << std::endl; - } - std::cout << "Inclusion Radius: " << radius << std::endl; - - std::cout << "Sequence separation: " << sequence_separation << std::endl; - if (structural_checks) { - std::cout << "Parameter filename: " << parameter_filename << std::endl; - std::cout << "Tolerance in stddevs for bonds: " << bond_tolerance << std::endl; - std::cout << "Tolerance in stddevs for angles: " << angle_tolerance << std::endl; LOG_INFO("Log entries format:"); LOG_INFO("BOND INFO FORMAT: Chain Residue ResNum Bond Min Max Observed Z-score Status"); LOG_INFO("ANGLE INFO FORMAT: Chain Residue ResNum Angle Min Max Observed Z-score Status"); @@ -382,183 +332,55 @@ int main (int argc, char **argv) } continue; } - EntityView v=model.GetChainList()[0].Select("peptide=true"); - EntityView outv=model.GetChainList()[0].Select("peptide=true"); - for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); - ref_list_it != ref_list.end(); ++ref_list_it) { - bool cons_check = ResidueNamesMatch(v,*ref_list_it,consistency_checks); - if (cons_check==false) { - if (consistency_checks==true) { - LOG_ERROR("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent."); - exit(-1); - } else { - LOG_WARNING("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent."); - } - } - } + EntityView model_view = model.GetChainList()[0].Select("peptide=true"); boost::filesystem::path pathstring(files[i]); - String filestring=BFPathToString(pathstring); - std::cout << "File: " << files[i] << std::endl; - std::pair<int,int> cov = compute_coverage(v,glob_dist_list); - if (cov.second == -1) { - std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl; - } else { - std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl; - } + std::cout << "File: " << files[i] << std::endl; if (structural_checks) { - // reads in parameter files - boost::filesystem::path loc(parameter_filename); - boost::filesystem::ifstream infile(loc); - if (!infile) { - std::cout << "Could not find " << parameter_filename << std::endl; - exit(-1); - } - std::vector<String> stereo_chemical_props; - String line; - while (std::getline(infile, line)) - { - std::stringstream line_stream(line); - stereo_chemical_props.push_back(line); - } - StereoChemicalParams bond_table = FillStereoChemicalParams("Bond",stereo_chemical_props); - if (bond_table.IsEmpty()) { - std::cout << "Error reading the Bond section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - StereoChemicalParams angle_table = FillStereoChemicalParams("Angle",stereo_chemical_props); - if (angle_table.IsEmpty()) { - std::cout << "Error reading the Angles section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - - ClashingDistances nonbonded_table = FillClashingDistances(stereo_chemical_props); - - if (nonbonded_table.IsEmpty()) { - std::cout << "Error reading the Clashing section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - // performs structural checks and filters the structure - StereoChemistryInfo stereo_chemistry_info; - try { - std::pair<EntityView,StereoChemistryInfo> csc_result = alg::CheckStereoChemistry(v,bond_table,angle_table,bond_tolerance,angle_tolerance); - v = csc_result.first; - stereo_chemistry_info = csc_result.second; - } catch (std::exception& e) { - std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl; + StereoChemicalProps stereochemical_params; + try { + stereochemical_params = ost::io::ReadStereoChemicalPropsFile(parameter_file_path, true); + } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } - std::cout << "Average Z-Score for bond lengths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreBonds() << std::endl; - std::cout << "Bonds outside of tolerance range: " << stereo_chemistry_info.GetBadBondCount() << " out of " << stereo_chemistry_info.GetBondCount() << std::endl; - std::cout << "Bond\tAvg Length\tAvg zscore\tNum Bonds" << std::endl; - std::map<String,BondLengthInfo> avg_bond_length_info = stereo_chemistry_info.GetAvgBondLengthInfo(); - for (std::map<String,BondLengthInfo>::const_iterator abli_it=avg_bond_length_info.begin();abli_it!=avg_bond_length_info.end();++abli_it) { - String key = (*abli_it).first; - BondLengthInfo bond_length_info = (*abli_it).second; - std::cout << key << "\t" << std::fixed << std::setprecision(5) << std::left << std::setw(10) << - bond_length_info.GetAvgLength() << "\t" << std::left << std::setw(10) << bond_length_info.GetAvgZscore() << "\t" << bond_length_info.GetCount() << std::endl; - } - std::cout << "Average Z-Score angle widths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreAngles() << std::endl; - std::cout << "Angles outside of tolerance range: " << stereo_chemistry_info.GetBadAngleCount() << " out of " << stereo_chemistry_info.GetAngleCount() << std::endl; - ClashingInfo clash_info; + try { - std::pair<EntityView,ClashingInfo> fc_result = alg::FilterClashes(v,nonbonded_table); - v = fc_result.first; - clash_info = fc_result.second; - } catch (std::exception& e) { - std::cout << "An error occurred during the structure quality checks, stage 2:" << std::endl; + CheckStructure(model_view, + stereochemical_params.bond_table, + stereochemical_params.angle_table, + stereochemical_params.nonbonded_table, + bond_tolerance, + angle_tolerance); + } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } - std::cout << clash_info.GetClashCount() << " non-bonded short-range distances shorter than tolerance distance" << std::endl; - std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; - - } - if (cov.first==0) { - std::cout << "Global LDDT score: 0.0" << std::endl; - return 0; - } - - // computes the lddt score - String label="localldt"; - std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, cutoffs, sequence_separation, label); - Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); - std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; - std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second - << " checked, over " << cutoffs.size() << " thresholds)" << std::endl; - - // prints the residue-by-residue statistics - if (structural_checks) { - std::cout << "Local LDDT Scores:" << std::endl; - std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; - std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; - std::cout << "in the backbone)" << std::endl; - } else { - std::cout << "Local LDDT Scores:" << std::endl; } - if (structural_checks) { - std::cout << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl; - } else { - std::cout << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl; - } - for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), - ce = outv.GetChainList().end(); ci != ce; ++ci) { - for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), - re = ci->GetResidueList().end(); rit != re; ++rit) { - - ResidueView ritv=*rit; - ResNum rnum = ritv.GetNumber(); - bool assessed = false; - String assessed_string="No"; - String quality_problems_string="No"; - Real lddt_local = -1; - String lddt_local_string="-"; - int conserved_dist = -1; - int total_dist = -1; - String dist_string = "-"; - if (is_resnum_in_globalrdmap(rnum,glob_dist_list)) { - assessed = true; - assessed_string="Yes"; - } - if (ritv.HasProp("stereo_chemical_violation_sidechain") || - ritv.HasProp("steric_clash_sidechain")) { - quality_problems_string="Yes"; - } - if (ritv.HasProp("stereo_chemical_violation_backbone") || - ritv.HasProp("steric_clash_backbone")) { - quality_problems_string="Yes+"; - } - if (assessed==true) { - if (ritv.HasProp(label)) { - lddt_local=ritv.GetFloatProp(label); - std::stringstream stkeylddt; - stkeylddt << std::fixed << std::setprecision(4) << lddt_local; - lddt_local_string=stkeylddt.str(); - conserved_dist=ritv.GetIntProp(label+"_conserved"); - total_dist=ritv.GetIntProp(label+"_total"); - std::stringstream stkeydist; - stkeydist << "("<< conserved_dist << "/" << total_dist << ")"; - dist_string=stkeydist.str(); - } else { - lddt_local = 0; - lddt_local_string="0.0000"; - conserved_dist = 0; - total_dist = 0; - dist_string="(0/0)"; - } - } - if (structural_checks) { - std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << quality_problems_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl; + // Check consistency + for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); + ref_list_it != ref_list.end(); ++ref_list_it) { + bool cons_check = ResidueNamesMatch(model_view,*ref_list_it, ignore_consistency_checks); + if (cons_check==false) { + if (ignore_consistency_checks==false) { + throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent."); } else { - std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl; + LOG_WARNING("Residue names in model and in reference structure(s) are inconsistent."); } } } - std::cout << std::endl; + + // computes the lddt score + LocalDistDiffTest(model_view, ref_list, glob_dist_list, settings); + + // prints the residue-by-residue statistics + std::vector<lDDTLocalScore> local_scores; + EntityView outview = model.GetChainList()[0].Select("peptide=true"); + local_scores = GetlDDTPerResidueStats(outview, glob_dist_list, structural_checks, settings.label); + PrintlDDTPerResidueStats(local_scores, structural_checks, settings.cutoffs.size()); } return 0; } diff --git a/modules/mol/alg/src/local_dist_diff_test.cc b/modules/mol/alg/src/local_dist_diff_test.cc index aad66e7a639e265256fe5511a628203f2e57df53..de6ea31e6febb60862594f372891710e604539cd 100644 --- a/modules/mol/alg/src/local_dist_diff_test.cc +++ b/modules/mol/alg/src/local_dist_diff_test.cc @@ -1,12 +1,30 @@ +#include <iomanip> +#include <sstream> #include <ost/log.hh> #include <ost/mol/mol.hh> +#include <ost/platform.hh> #include "local_dist_diff_test.hh" #include <boost/concept_check.hpp> +#include <boost/filesystem/convenience.hpp> +#include <ost/mol/alg/consistency_checks.hh> +#include <ost/io/stereochemical_params_reader.hh> namespace ost { namespace mol { namespace alg { namespace { +// helper_function +String vector_to_string(std::vector<Real> vec) { + std::ostringstream str; + for (unsigned int i = 0; i < vec.size(); ++i) { + str << vec[i]; + if (i+1 != vec.size()) { + str << ", "; + } + } + return str.str(); +} + // helper function bool within_tolerance(Real mdl_dist, const std::pair<Real,Real>& values, Real tol) { @@ -307,6 +325,42 @@ void merge_distance_lists(GlobalRDMap& ref_dist_map, const GlobalRDMap& new_dist } +// Computes coverage +std::pair<int,int> ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list) +{ + int second=0; + int first=0; + if (v.GetResidueList().size()==0) { + if (glob_dist_list.size()==0) { + return std::make_pair(0,-1); + } else { + return std::make_pair(0,glob_dist_list.size()); + } + } + ChainView vchain=v.GetChainList()[0]; + for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i) + { + ResNum rnum = (*i).first; + second++; + if (vchain.FindResidue(rnum)) { + first++; + } + } + return std::make_pair(first,second); +} + +bool IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list) +{ + for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) { + ResNum rn = i->first; + if (rn==resnum) { + return true; + } + } + return false; +} + + // helper function bool IsStandardResidue(String rn) { @@ -335,7 +389,214 @@ bool IsStandardResidue(String rn) return true; } return false; -} +} + +StereoChemicalProps::StereoChemicalProps(): + is_valid(false) {} + +StereoChemicalProps::StereoChemicalProps( + ost::mol::alg::StereoChemicalParams& init_bond_table, + ost::mol::alg::StereoChemicalParams& init_angle_table, + ost::mol::alg::ClashingDistances& init_nonbonded_table): + is_valid(true), + bond_table(init_bond_table), + angle_table(init_angle_table), + nonbonded_table(init_nonbonded_table) {} + +lDDTSettings::lDDTSettings(): radius(15.0), + sequence_separation(0), + label("locallddt") { + cutoffs.push_back(0.5); + cutoffs.push_back(1.0); + cutoffs.push_back(2.0); + cutoffs.push_back(4.0); + } + +lDDTSettings::lDDTSettings(Real init_radius, + int init_sequence_separation, + std::vector<Real>& init_cutoffs, + String init_label): + radius(init_radius), + sequence_separation(init_sequence_separation), + cutoffs(init_cutoffs), + label(init_label) {} + +std::string lDDTSettings::ToString() { + std::ostringstream rep; + rep << "Inclusion Radius: " << radius << "\n"; + rep << "Sequence separation: " << sequence_separation << "\n"; + rep << "Cutoffs: " << vector_to_string(cutoffs) << "\n"; + rep << "Residue properties label: " << label << "\n"; + + return rep.str(); +} + +void lDDTSettings::PrintParameters() { + std::cout << ToString(); +} + +// Default constructor is neccessary for boost exposure +lDDTLocalScore::lDDTLocalScore(): cname(""), + rname(""), + rnum(-1), + is_assessed(""), + quality_problems(""), + local_lddt(-1.0), + conserved_dist(-1), + total_dist(-1) {} + +lDDTLocalScore::lDDTLocalScore(String init_cname, + String init_rname, + int init_rnum, + String init_is_assessed, + String init_quality_problems, + Real init_local_lddt, + int init_conserved_dist, + int init_total_dist): + cname(init_cname), + rname(init_rname), + rnum(init_rnum), + is_assessed(init_is_assessed), + quality_problems(init_quality_problems), + local_lddt(init_local_lddt), + conserved_dist(init_conserved_dist), + total_dist(init_total_dist) {} + +String lDDTLocalScore::ToString(bool structural_checks) const { + std::stringstream stkeylddt; + std::stringstream outstr; + stkeylddt << "(" << conserved_dist << "/" << total_dist << ")"; + String dist_string = stkeylddt.str(); + if (structural_checks) { + outstr << cname << "\t" << rname << "\t" << rnum << '\t' << is_assessed << '\t' << quality_problems << '\t' << local_lddt << "\t" << dist_string; + } else { + outstr << cname << "\t" << rname << "\t" << rnum << '\t' << is_assessed << '\t' << local_lddt << "\t" << dist_string; + } + return outstr.str(); +} + +String lDDTLocalScore::Repr() const { + std::stringstream outstr; + outstr << "<lDDTLocalScore " << cname << "." << rname << "." << rnum << ">"; + return outstr.str(); +} + +String lDDTLocalScore::GetHeader(bool structural_checks, int cutoffs_length) { + std::stringstream outstr; + if (structural_checks) { + outstr << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs_length << " thresholds)"; + } else { + outstr << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs_length << " thresholds)"; + } + return outstr.str(); +} + +lDDTScorer::lDDTScorer(std::vector<EntityView>& init_references, + ost::mol::EntityView& init_model, + lDDTSettings& init_settings): + settings(init_settings), + model_view(init_model), + references_view(init_references) + { + _score_calculated = false; + _score_valid = false; + _has_local_scores = false; + _num_cons_con = -1; + _num_tot_con = -1; + _global_score = -1.0; + CleanlDDTReferences(references_view); + _PrepareGlobalRDMap(); + } + +Real lDDTScorer::GetGlobalScore(){ + if (!_score_calculated) { + _ComputelDDT(); + } + return _global_score; +} + +int lDDTScorer::GetNumConservedContacts(){ + if (!_score_calculated) { + _ComputelDDT(); + } + return _num_cons_con; +} + +int lDDTScorer::GetNumTotalContacts(){ + if (!_score_calculated) { + _ComputelDDT(); + } + return _num_tot_con; +} + +std::vector<lDDTLocalScore> lDDTScorer::GetLocalScores(){ + if (!_has_local_scores) { + _GetLocallDDT(); + } + return _local_scores; +} + +void lDDTScorer::PrintPerResidueStats(){ + if (!_has_local_scores) { + _GetLocallDDT(); + } + PrintlDDTPerResidueStats(_local_scores, + false, + settings.cutoffs.size()); +} + +std::vector<EntityView> lDDTScorer::GetReferences(){ + return references_view; +} + +void lDDTScorer::_PrepareGlobalRDMap(){ + glob_dist_list = PreparelDDTGlobalRDMap(references_view, + settings.cutoffs, + settings.sequence_separation, + settings.radius); +} + +bool lDDTScorer::IsValid(){ + return _score_valid; +} + +void lDDTScorer::_ComputelDDT(){ + std::pair<int,int> cov = ComputeCoverage(model_view,glob_dist_list); + if (cov.second == -1) { + LOG_INFO("Coverage: 0 (0 out of 0 residues)"); + } else { + std::stringstream sout; + sout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)"; + LOG_INFO(sout.str()); + } + + if (cov.first == 0) { + _num_tot_con = 0; + _num_cons_con = 0; + _global_score = 0.0; + _score_calculated = true; + _score_valid = false; + } + + std::pair<int,int> total_ov=alg::LocalDistDiffTest(model_view, glob_dist_list, settings.cutoffs, settings.sequence_separation, settings.label); + Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); + _num_tot_con = total_ov.second ? total_ov.second : 1; + _num_cons_con = total_ov.first; + _global_score = lddt; + _score_calculated = true; + _score_valid = true; +} + +void lDDTScorer::_GetLocallDDT(){ + if (!_score_calculated){ + _ComputelDDT(); + } + _local_scores = GetlDDTPerResidueStats(model_view, + glob_dist_list, + false, // do not print structural checks + settings.label); + _has_local_scores = true; +} GlobalRDMap CreateDistanceList(const EntityView& ref,Real max_dist) @@ -473,6 +734,30 @@ Real LocalDistDiffTest(const EntityView& mdl, const EntityView& target, Real cut return static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); } +Real LocalDistDiffTest(const EntityView& v, + std::vector<EntityView>& ref_list, + const GlobalRDMap& glob_dist_list, + lDDTSettings& settings) { + + std::pair<int,int> cov = ComputeCoverage(v,glob_dist_list); + if (cov.second == -1) { + std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl; + } else { + std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl; + } + + if (cov.first==0) { + std::cout << "Global LDDT score: 0.0" << std::endl; + return 0.0; + } + + std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, settings.cutoffs, settings.sequence_separation, settings.label); + Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); + std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; + std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second + << " checked, over " << settings.cutoffs.size() << " thresholds)" << std::endl; + return lddt; +} Real LocalDistDiffTest(const ost::seq::AlignmentHandle& aln, Real cutoff, Real max_dist, int ref_index, int mdl_index) @@ -536,6 +821,163 @@ Real LDDTHA(EntityView& v, const GlobalRDMap& global_dist_list, int sequence_sep return static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); } + +void CleanlDDTReferences(std::vector<EntityView>& ref_list){ + for (unsigned int i=0;i<ref_list.size();i++) { + if (ref_list[0].GetChainList()[0].GetName()!=ref_list[i].GetChainList()[0].GetName()) { + std::cout << "ERROR: First chains in the reference structures have different names" << std::endl; + exit(-1); + } + ref_list[i] = ref_list[i].GetChainList()[0].Select("peptide=true"); + } +} + +GlobalRDMap PreparelDDTGlobalRDMap(const std::vector<EntityView>& ref_list, + std::vector<Real>& cutoff_list, + int sequence_separation, + Real max_dist){ + GlobalRDMap glob_dist_list; + if (ref_list.size()==1) { + glob_dist_list = CreateDistanceList(ref_list[0], max_dist); + } else { + glob_dist_list = CreateDistanceListFromMultipleReferences(ref_list, + cutoff_list, + sequence_separation, + max_dist); + } + + return glob_dist_list; +} + +void CheckStructure(EntityView& ent, + StereoChemicalParams& bond_table, + StereoChemicalParams& angle_table, + ClashingDistances& nonbonded_table, + Real bond_tolerance, + Real angle_tolerance){ + // performs structural checks and filters the structure + StereoChemistryInfo stereo_chemistry_info; + try { + std::pair<EntityView,StereoChemistryInfo> csc_result = alg::CheckStereoChemistry(ent,bond_table,angle_table,bond_tolerance,angle_tolerance); + ent = csc_result.first; + stereo_chemistry_info = csc_result.second; + } catch (std::exception& e) { + std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl; + std::cout << e.what() << std::endl; + exit(-1); + } + std::cout << "Average Z-Score for bond lengths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreBonds() << std::endl; + std::cout << "Bonds outside of tolerance range: " << stereo_chemistry_info.GetBadBondCount() << " out of " << stereo_chemistry_info.GetBondCount() << std::endl; + std::cout << "Bond\tAvg Length\tAvg zscore\tNum Bonds" << std::endl; + std::map<String,BondLengthInfo> avg_bond_length_info = stereo_chemistry_info.GetAvgBondLengthInfo(); + for (std::map<String,BondLengthInfo>::const_iterator abli_it=avg_bond_length_info.begin();abli_it!=avg_bond_length_info.end();++abli_it) { + String key = (*abli_it).first; + BondLengthInfo bond_length_info = (*abli_it).second; + std::cout << key << "\t" << std::fixed << std::setprecision(5) << std::left << std::setw(10) << + bond_length_info.GetAvgLength() << "\t" << std::left << std::setw(10) << bond_length_info.GetAvgZscore() << "\t" << bond_length_info.GetCount() << std::endl; + } + std::cout << "Average Z-Score angle widths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreAngles() << std::endl; + std::cout << "Angles outside of tolerance range: " << stereo_chemistry_info.GetBadAngleCount() << " out of " << stereo_chemistry_info.GetAngleCount() << std::endl; + ClashingInfo clash_info; + try { + std::pair<EntityView,ClashingInfo> fc_result = alg::FilterClashes(ent,nonbonded_table); + ent = fc_result.first; + clash_info = fc_result.second; + } catch (std::exception& e) { + std::stringstream serr; + serr << "An error occurred during the structure quality checks, stage 2: " << e.what(); + throw ost::Error(serr.str()); + } + std::cout << clash_info.GetClashCount() << " non-bonded short-range distances shorter than tolerance distance" << std::endl; + std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; +} + +std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityView& model, + GlobalRDMap& glob_dist_list, + bool structural_checks, + String label){ + std::vector<lDDTLocalScore> scores; + EntityView outv = model; + for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), + ce = outv.GetChainList().end(); ci != ce; ++ci) { + for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), + re = ci->GetResidueList().end(); rit != re; ++rit) { + + ResidueView ritv=*rit; + ResNum rnum = ritv.GetNumber(); + bool assessed = false; + String assessed_string="No"; + String quality_problems_string; + if (structural_checks) { + quality_problems_string="No"; + } else { + quality_problems_string="NA"; + } + Real lddt_local = -1; + String lddt_local_string="-"; + int conserved_dist = -1; + int total_dist = -1; + if (IsResnumInGlobalRDMap(rnum,glob_dist_list)) { + assessed = true; + assessed_string="Yes"; + } + if (ritv.HasProp("stereo_chemical_violation_sidechain") || + ritv.HasProp("steric_clash_sidechain")) { + quality_problems_string="Yes"; + } + if (ritv.HasProp("stereo_chemical_violation_backbone") || + ritv.HasProp("steric_clash_backbone")) { + quality_problems_string="Yes+"; + } + + if (assessed==true) { + if (ritv.HasProp(label)) { + lddt_local=ritv.GetFloatProp(label); + std::stringstream stkeylddt; + stkeylddt << std::fixed << std::setprecision(4) << lddt_local; + lddt_local_string=stkeylddt.str(); + conserved_dist=ritv.GetIntProp(label+"_conserved"); + total_dist=ritv.GetIntProp(label+"_total"); + } else { + //std::cout << label << std::endl; + lddt_local = 0; + lddt_local_string="0.0000"; + conserved_dist = 0; + total_dist = 0; + } + } + // std::tuple<String, String, int, String, String, Real, int, int> + lDDTLocalScore row(ritv.GetChain().GetName(), + ritv.GetName(), + ritv.GetNumber().GetNum(), + assessed_string, + quality_problems_string, + lddt_local, + conserved_dist, + total_dist); + scores.push_back(row); + } + } + + return scores; +} + +void PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, bool structural_checks, int cutoffs_length){ + if (structural_checks) { + std::cout << "Local LDDT Scores:" << std::endl; + std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; + std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; + std::cout << "in the backbone)" << std::endl; + } else { + std::cout << "Local LDDT Scores:" << std::endl; + } + std::cout << lDDTLocalScore::GetHeader(structural_checks, cutoffs_length) << std::endl; + for (std::vector<lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + std::cout << sit->ToString(structural_checks) << std::endl; + } + std::cout << std::endl; +} + // debugging code /* Real OldStyleLDDTHA(EntityView& v, const GlobalRDMap& global_dist_list) diff --git a/modules/mol/alg/src/local_dist_diff_test.hh b/modules/mol/alg/src/local_dist_diff_test.hh index 9dd2f230617ac1b0b795ec6a7e467a80c1485655..916fe4facd4f71dbc0aa672b824e4d6c84500c00 100644 --- a/modules/mol/alg/src/local_dist_diff_test.hh +++ b/modules/mol/alg/src/local_dist_diff_test.hh @@ -20,10 +20,105 @@ #define OST_MOL_ALG_LOCAL_DIST_TEST_HH #include <ost/mol/alg/module_config.hh> +#include <ost/mol/entity_handle.hh> #include <ost/seq/alignment_handle.hh> #include <ost/mol/alg/distance_test_common.hh> +#include <ost/mol/alg/filter_clashes.hh> namespace ost { namespace mol { namespace alg { + +struct StereoChemicalProps +{ + bool is_valid; + ost::mol::alg::StereoChemicalParams bond_table; + ost::mol::alg::StereoChemicalParams angle_table; + ost::mol::alg::ClashingDistances nonbonded_table; + + StereoChemicalProps(); + StereoChemicalProps(ost::mol::alg::StereoChemicalParams& init_bond_table, + ost::mol::alg::StereoChemicalParams& init_angle_table, + ost::mol::alg::ClashingDistances& init_nonbonded_table); +}; + +struct lDDTSettings { + Real radius; + int sequence_separation; + std::vector<Real> cutoffs; + String label; + + lDDTSettings(); + lDDTSettings(Real init_radius, + int init_sequence_separation, + std::vector<Real>& init_cutoffs, + String init_label); + void PrintParameters(); + std::string ToString(); +}; + +struct lDDTLocalScore { + String cname; + String rname; + int rnum; + String is_assessed; + String quality_problems; + Real local_lddt; + int conserved_dist; + int total_dist; + + lDDTLocalScore(); + + lDDTLocalScore(String init_cname, + String init_rname, + int init_rnum, + String init_is_assessed, + String init_quality_problems, + Real init_local_lddt, + int init_conserved_dist, + int init_total_dist); + + String ToString(bool structural_checks) const; + String Repr() const; + + static String GetHeader(bool structural_checks, int cutoffs_length); +}; + +class lDDTScorer +{ + public: + lDDTSettings settings; + EntityView model_view; + std::vector<EntityView> references_view; + GlobalRDMap glob_dist_list; + + lDDTScorer(std::vector<EntityView>& init_references, + ost::mol::EntityView& init_model, + lDDTSettings& init_settings); + Real GetGlobalScore(); + std::vector<lDDTLocalScore> GetLocalScores(); + int GetNumConservedContacts(); // number of conserved distances in the model + int GetNumTotalContacts(); // the number of total distances in the reference structure + std::vector<EntityView> GetReferences(); + void PrintPerResidueStats(); + bool IsValid(); + + private: + bool _score_calculated; + bool _score_valid; + bool _has_local_scores; + // number of conserved distances in the model and + // the number of total distances in the reference structure + int _num_cons_con; + int _num_tot_con; + Real _global_score; + std::vector<lDDTLocalScore> _local_scores; + void _ComputelDDT(); + void _GetLocallDDT(); + void _PrepareGlobalRDMap(); +}; + +std::pair<int,int> DLLEXPORT_OST_MOL_ALG ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list); + +bool DLLEXPORT_OST_MOL_ALG IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list); /// \brief Calculates number of distances conserved in a model, given a list of distances to check and a model /// @@ -73,6 +168,12 @@ Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& mdl, Real cutoff, Real max_dist, const String& local_ldt_property_string=""); +/// \brief Wrapper around LocalDistDiffTest +Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& v, + std::vector<EntityView>& ref_list, + const GlobalRDMap& glob_dist_list, + lDDTSettings& settings); + /// \brief Calculates the Local Distance Difference Test score for a given model starting from an alignment between a reference structure and the model. /// /// Calculates the Local Distance Difference Test score given an alignment between a model and a taget structure. @@ -135,8 +236,31 @@ void DLLEXPORT_OST_MOL_ALG PrintResidueRDMap(const ResidueRDMap& res_dist_list); // circular dependencies bool DLLEXPORT_OST_MOL_ALG IsStandardResidue(String rn); -}}} +void DLLEXPORT_OST_MOL_ALG CleanlDDTReferences(std::vector<EntityView>& ref_list); -#endif +// Prepare GlobalRDMap from reference list +GlobalRDMap DLLEXPORT_OST_MOL_ALG PreparelDDTGlobalRDMap( + const std::vector<EntityView>& ref_list, + std::vector<Real>& cutoff_list, + int sequence_separation, + Real max_dist); +void DLLEXPORT_OST_MOL_ALG CheckStructure(EntityView& ent, + StereoChemicalParams& bond_table, + StereoChemicalParams& angle_table, + ClashingDistances& nonbonded_table, + Real bond_tolerance, + Real angle_tolerance); +std::vector<lDDTLocalScore> DLLEXPORT_OST_MOL_ALG GetlDDTPerResidueStats(EntityView& model, + GlobalRDMap& glob_dist_list, + bool structural_checks, + String label); + +void DLLEXPORT_OST_MOL_ALG PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, + bool structural_checks, + int cutoffs_length); + +}}} + +#endif diff --git a/modules/mol/alg/src/molck.cc b/modules/mol/alg/src/molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..ef5585d897e74bfd079066ac37ffc57ac774cf7e --- /dev/null +++ b/modules/mol/alg/src/molck.cc @@ -0,0 +1,169 @@ +#include <ost/mol/xcs_editor.hh> +#include <ost/mol/alg/nonstandard.hh> +#include <ost/conop/model_check.hh> +#include <ost/conop/amino_acids.hh> +#include <ost/conop/rule_based.hh> +#include <ost/mol/alg/molck.hh> + +using namespace ost::conop; +using namespace ost::mol; + + +void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib) { + // TODO: Maybe it is possible to make it in-place operation + EntityHandle new_ent=CreateEntity(); + new_ent.SetName(ent.GetName()); + ChainHandleList chains=ent.GetChainList(); + XCSEditor new_edi=new_ent.EditXCS(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ChainHandle new_chain = new_edi.InsertChain(c->GetName()); + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + AminoAcid aa = ResidueToAminoAcid(*r); + if (aa!=XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } else { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || + OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); + ost::mol::alg::CopyResidue(*r,dest_res,new_edi,lib); + } + } + } + ent = new_ent; + // Since we didn't do it in-place: reprocess the new entity + RuleBasedProcessor pr(lib); + pr.Process(ent); +} + +void ost::mol::alg::RemoveAtoms( + EntityHandle& ent, + CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored /*=true*/){ + XCSEditor edi=ent.EditXCS(); + Diagnostics diags; + Checker checker(lib, ent, diags); + if (rm_zero_occ_atoms) { + std::cerr << "removing atoms with zero occupancy" << std::endl; + int zremoved=0; + AtomHandleList zero_atoms=checker.GetZeroOccupancy(); + for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + zremoved++; + } + std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; + } + + if (rm_hyd_atoms) { + std::cerr << "removing hydrogen atoms" << std::endl; + int hremoved=0; + AtomHandleList hyd_atoms=checker.GetHydrogens(); + for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + hremoved++; + } + std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; + } + + if (rm_oxt_atoms) { + std::cerr << "removing OXT atoms" << std::endl; + int oremoved=0; + AtomHandleList atoms=ent.GetAtomList(); + for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if (i->GetName()=="OXT") { + edi.DeleteAtom(*i); + oremoved++; + } + } + std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + } + + checker.CheckForCompleteness(); + checker.CheckForUnknownAtoms(); + checker.CheckForNonStandard(); + for (Diagnostics::const_diag_iterator + j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { + const Diag* diag=*j; + std::cerr << diag->Format(colored); + switch (diag->GetType()) { + case DIAG_UNK_ATOM: + if (rm_unk_atoms) { + edi.DeleteAtom(diag->GetAtom(0)); + std::cerr << " --> removed "; + } + break; + case DIAG_NONSTD_RESIDUE: + if (rm_non_std) { + edi.DeleteResidue(diag->GetResidue(0)); + std::cerr << " --> removed "; + } + break; + default: + break; + } + std::cerr << std::endl; + } +} + +void ost::mol::alg::CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ + ChainHandleList chains=ent.GetChainList(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + AtomHandleList atoms=r->GetAtomList(); + if (!compound) { + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + j->SetElement(""); + } + continue; + } + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + int specindx=compound->GetAtomSpecIndex(j->GetName()); + if (specindx!=-1) { + j->SetElement(compound->GetAtomSpecs()[specindx].element); + } else { + j->SetElement(""); + } + } + } + } +} + +void ost::mol::alg::Molck( + ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const ost::mol::alg::MolckSettings& settings=ost::mol::alg::MolckSettings()){ + if (settings.map_nonstd_res) { + ost::mol::alg::MapNonStandardResidues(ent, lib); + } + ost::mol::alg::RemoveAtoms(ent, + lib, + settings.rm_unk_atoms, + settings.rm_non_std, + settings.rm_hyd_atoms, + settings.rm_oxt_atoms, + settings.rm_zero_occ_atoms, + settings.colored); + if (settings.assign_elem) { + ost::mol::alg::CleanUpElementColumn(ent, lib); + } +} \ No newline at end of file diff --git a/modules/mol/alg/src/molck.hh b/modules/mol/alg/src/molck.hh new file mode 100644 index 0000000000000000000000000000000000000000..d61e0218678bd07fba0e828fdf0f901dd69977e5 --- /dev/null +++ b/modules/mol/alg/src/molck.hh @@ -0,0 +1,104 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MOL_ALG_MOLCK_HH +#define OST_MOL_ALG_MOLCK_HH + +#include <string> +#include <ost/mol/entity_handle.hh> +#include <ost/conop/compound_lib.hh> + +namespace { + inline std::string BoolToString(bool b) + { + return b ? "True" : "False"; + } +} + +namespace ost { namespace mol{ namespace alg { + +struct MolckSettings; + +struct MolckSettings{ + + bool rm_unk_atoms; + bool rm_non_std; + bool rm_hyd_atoms; + bool rm_oxt_atoms; + bool rm_zero_occ_atoms; + bool colored; + bool map_nonstd_res; + bool assign_elem; + + MolckSettings(bool init_rm_unk_atoms=false, + bool init_rm_non_std=false, + bool init_rm_hyd_atoms=true, + bool init_rm_oxt_atoms=false, + bool init_rm_zero_occ_atoms=false, + bool init_colored=false, + bool init_map_nonstd_res=true, + bool init_assign_elem=true): + rm_unk_atoms(init_rm_unk_atoms), // Remove unknown and atoms not following the nomenclature + rm_non_std(init_rm_non_std), // Remove all residues not one of the 20 standard amino acids + rm_hyd_atoms(init_rm_hyd_atoms), // Remove hydrogen atoms + rm_oxt_atoms(init_rm_oxt_atoms), // Remove terminal oxygens + rm_zero_occ_atoms(init_rm_zero_occ_atoms), // Remove atoms with zero occupancy + colored(init_colored), // Whether the output should be colored + map_nonstd_res(init_map_nonstd_res), // Map non standard residues back to standard ones (e.g.: MSE->MET,SEP->SER,etc.) + assign_elem(init_assign_elem){} // Clean up element column + + public: + std::string ToString(){ + std::string rep = "MolckSettings(rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_non_std=" + BoolToString(rm_non_std) + + ", rm_hyd_atoms=" + BoolToString(rm_hyd_atoms) + + ", rm_oxt_atoms=" + BoolToString(rm_oxt_atoms) + + ", rm_zero_occ_atoms=" + BoolToString(rm_zero_occ_atoms) + + ", colored=" + BoolToString(colored) + + ", map_nonstd_res=" + BoolToString(map_nonstd_res) + + ", assign_elem=" + BoolToString(assign_elem) + + ")"; + return rep; + } + +}; + +void MapNonStandardResidues(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void RemoveAtoms(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored=true); + +void CleanUpElementColumn(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void Molck(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const MolckSettings& settings); + + +}}} // namespace + +#endif diff --git a/modules/conop/src/nonstandard.cc b/modules/mol/alg/src/nonstandard.cc similarity index 99% rename from modules/conop/src/nonstandard.cc rename to modules/mol/alg/src/nonstandard.cc index eaf06680d087c05080f0e578828c7bad3fd68c66..b51c582adcdd81fb5b02cff834c18c218b7b317d 100644 --- a/modules/conop/src/nonstandard.cc +++ b/modules/mol/alg/src/nonstandard.cc @@ -32,7 +32,7 @@ using namespace ost::mol; using namespace ost; using namespace ost::conop; -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi) @@ -234,4 +234,4 @@ bool CopyNonConserved(ResidueHandle src_res, ResidueHandle dst_res, -}} +}}} diff --git a/modules/conop/src/nonstandard.hh b/modules/mol/alg/src/nonstandard.hh similarity index 78% rename from modules/conop/src/nonstandard.hh rename to modules/mol/alg/src/nonstandard.hh index a57022def8ef7436889491c7ae6714c6e9a69d6e..1dce74fd4c2cf102f985f06d22c2723e93053075 100644 --- a/modules/conop/src/nonstandard.hh +++ b/modules/mol/alg/src/nonstandard.hh @@ -17,30 +17,31 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ -#ifndef OST_CONOP_NONSTANDARD_HH -#define OST_CONOP_NONSTANDARD_HH +#ifndef OST_MOL_ALG_NONSTANDARD_HH +#define OST_MOL_ALG_NONSTANDARD_HH /* Author: Marco Biasini, Juergen Haas */ #include "module_config.hh" -#include "compound_lib.hh" +#include <ost/conop/compound_lib.hh> -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { /// \brief copies all atom of src_res to dst_res, gets compound lib from builder -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi); /// \brief copies all atom of src_res to dst_res, requires a compound lib -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, - ost::mol::XCSEditor& edi, CompoundLibPtr lib); + ost::mol::XCSEditor& edi, + ost::conop::CompoundLibPtr lib); /// \brief copies all atom of src_res to dst_res @@ -49,7 +50,7 @@ bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, /// \param edi /// \param has_cbeta will be set to true if the src_res has a cbeta and the /// dst_residue is not a glycine -bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyIdentical(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -63,10 +64,11 @@ bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); /// \brief copies atoms of src_res to dst_res, requires compound lib /// @@ -77,7 +79,7 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -89,14 +91,14 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, /// only copied if dst_res is not equal to glycine. -bool DLLEXPORT_OST_CONOP CopyNonConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyNonConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); /// \brief construct dst_res from src_res when src_res is an MSE -bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyMSE(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -104,13 +106,14 @@ bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, /// \brief construct a dst_res with only atoms matching the standard aminoacid /// from src_res when src_res is an is modified -bool DLLEXPORT_OST_CONOP CopyModified(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyModified(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); -}} +}}} #endif diff --git a/modules/mol/alg/src/svd_superpose.cc b/modules/mol/alg/src/svd_superpose.cc index 59adf6141d3fcab79dd11753127ea49833c101e7..b8f7759391399723eda64a3e2fb3c291c1cdbc68 100644 --- a/modules/mol/alg/src/svd_superpose.cc +++ b/modules/mol/alg/src/svd_superpose.cc @@ -113,6 +113,13 @@ Real CalculateRMSD(const mol::EntityView& ev1, const mol::EntityView& ev2, const geom::Mat4& transformation) { + if(ev1.GetAtomCount() != ev2.GetAtomCount()) { + std::stringstream ss; + ss << "Number of atoms in input views must be consistent! "; + ss << ev1.GetAtomCount() << " vs. " << ev2.GetAtomCount(); + throw Error(ss.str()); + } + return calc_rmsd_for_atom_lists(ev1.GetAtomList(), ev2.GetAtomList(), transformation); } diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt index 0002d71492e639d21d456a93f94950db59aa1afe..0715910b1b9f9afac4de6e94169456ba21f80a3c 100644 --- a/modules/mol/alg/tests/CMakeLists.txt +++ b/modules/mol/alg/tests/CMakeLists.txt @@ -11,7 +11,8 @@ set(OST_MOL_ALG_UNIT_TESTS ) if (COMPOUND_LIB) - list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py) + list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py + test_nonstandard.py) endif() ost_unittest(MODULE mol_alg SOURCES "${OST_MOL_ALG_UNIT_TESTS}" LINK ost_io) diff --git a/modules/conop/tests/test_nonstandard.py b/modules/mol/alg/tests/test_nonstandard.py similarity index 78% rename from modules/conop/tests/test_nonstandard.py rename to modules/mol/alg/tests/test_nonstandard.py index 566db9a2b1902339828abaec67e7e35713fdf47a..c81c92479a4150c76aa89ff92409a4dc7656e25f 100644 --- a/modules/conop/tests/test_nonstandard.py +++ b/modules/mol/alg/tests/test_nonstandard.py @@ -1,5 +1,5 @@ import unittest -from ost import conop, io, mol +from ost import io, mol class TestNonStandard(unittest.TestCase): @@ -12,7 +12,7 @@ class TestNonStandard(unittest.TestCase): c=ed.InsertChain('A') ed.AppendResidue(c, 'SER') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) self.assertTrue(has_cbeta) residues=new_hdl.residues @@ -36,16 +36,16 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'HIS') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(has_cbeta) self.assertTrue(err) @@ -72,28 +72,28 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'MET') # MET to MET - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) #GLY to GLY - err =conop.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) + err =mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) self.assertTrue(err) # GLY to GLY - err =conop.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) + err =mol.alg.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) self.assertTrue(err) #now we copy a HIS to a HIS - err =conop.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) + err =mol.alg.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(err) # copy a GLY to a HIS - err, has_cbeta=conop.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) + err, has_cbeta=mol.alg.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) self.assertFalse(has_cbeta) # copy a MET to a GLY - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) self.assertFalse(err) # copy a MET to a HIS - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) self.assertFalse(err) # copy a GLY to a MET with adding CB - err=conop.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) + err=mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) self.assertFalse(err) residues=new_hdl.residues diff --git a/modules/mol/alg/tests/test_qsscoring.py b/modules/mol/alg/tests/test_qsscoring.py index 17735209ed23910d30abf4d565dbb246ffbeb30a..c241eea49dea794a7f620014561a482019b3507a 100644 --- a/modules/mol/alg/tests/test_qsscoring.py +++ b/modules/mol/alg/tests/test_qsscoring.py @@ -9,6 +9,7 @@ except ImportError: "Ignoring test_qsscoring.py tests." sys.exit(0) +from ost.mol.alg import lDDTSettings def _LoadFile(file_name): """Helper to avoid repeating input path over and over.""" @@ -92,17 +93,17 @@ class TestQSscore(unittest.TestCase): ent_empty = ent.CreateEmptyView() qs_ent_invalid = QSscoreEntity(ent_empty) self.assertFalse(qs_ent_invalid.is_valid) - # monomer + # monomer - should be valid ent_mono = ent.Select('cname=A') - qs_ent_invalid = QSscoreEntity(ent_mono) - self.assertFalse(qs_ent_invalid.is_valid) + qs_ent_mono = QSscoreEntity(ent_mono) + self.assertTrue(qs_ent_mono.is_valid) # short chain removed ent_short = ent.Select('cname=A or rnum<20') - qs_ent_invalid = QSscoreEntity(ent_short) - self.assertFalse(qs_ent_invalid.is_valid) - self.assertEqual(sorted(qs_ent_invalid.removed_chains), ['B', '_']) + qs_ent_mono = QSscoreEntity(ent_short) + self.assertTrue(qs_ent_mono.is_valid) + self.assertEqual(sorted(qs_ent_mono.removed_chains), ['B', '_']) # non-AA chain removal - ent_non_AA = ent_extra.Select('cname=A,C,D') + ent_non_AA = ent_extra.Select('cname=C,D') qs_ent_invalid = QSscoreEntity(ent_non_AA) self.assertFalse(qs_ent_invalid.is_valid) self.assertEqual(sorted(qs_ent_invalid.removed_chains), ['C', 'D']) @@ -376,20 +377,30 @@ class TestQSscore(unittest.TestCase): # TEST EXTRA SCORES def test_lDDT(self): - # lDDT is not symmetrical and does not account for overprediction! + # check for penalized and unpenalized oligo lDDT ref = _LoadFile('4br6.1.pdb').Select('cname=A,B') mdl = _LoadFile('4br6.1.pdb') + lddt_settings = lDDTSettings() qs_scorer = QSscorer(ref, mdl) + lddt_oligo_scorer = qs_scorer.GetOligoLDDTScorer(lddt_settings, False) self.assertAlmostEqual(qs_scorer.global_score, 0.171, 2) self.assertAlmostEqual(qs_scorer.best_score, 1.00, 2) - self.assertAlmostEqual(qs_scorer.lddt_score, 1.00, 2) - self._CheckScorerLDDT(qs_scorer) + self.assertAlmostEqual(lddt_oligo_scorer.oligo_lddt, 1.00, 2) + # with penalty we account for extra model chains + lddt_oligo_scorer_pen = qs_scorer.GetOligoLDDTScorer(lddt_settings, True) + self.assertAlmostEqual(lddt_oligo_scorer_pen.oligo_lddt, 0.5213, 2) # flip them (use QSscoreEntity to go faster) - qs_scorer2 = QSscorer(qs_scorer.qs_ent_2, qs_scorer.qs_ent_1) + qs_scorer2 = QSscorer(qs_scorer.qs_ent_2, + qs_scorer.qs_ent_1, + res_num_alignment=True) + lddt_oligo_scorer2 = qs_scorer2.GetOligoLDDTScorer(lddt_settings, False) self.assertAlmostEqual(qs_scorer2.global_score, 0.171, 2) self.assertAlmostEqual(qs_scorer2.best_score, 1.00, 2) - self.assertAlmostEqual(qs_scorer2.lddt_score, 0.483, 2) - self._CheckScorerLDDT(qs_scorer) + # without penalty we don't see extra chains + self.assertAlmostEqual(lddt_oligo_scorer2.oligo_lddt, 1.00, 2) + # with penalty we account for extra reference chains + lddt_oligo_scorer2_pen = qs_scorer2.GetOligoLDDTScorer(lddt_settings, True) + self.assertAlmostEqual(lddt_oligo_scorer2_pen.oligo_lddt, 0.4496, 2) # check properties self.assertFalse(qs_scorer.calpha_only) self.assertEqual(qs_scorer.chem_mapping, {('B', 'A'): ('B', 'C', 'D', 'A')}) @@ -663,33 +674,6 @@ class TestQSscore(unittest.TestCase): self.assertLessEqual(qs_scorer.global_score, 1.0) - def _CheckScorerLDDT(self, qs_scorer): - # check if we live up to our promises (assume: we did global and lddt score) - self._CheckScorer(qs_scorer) - # check lddt_mdl and lddt_ref - self.assertEqual(qs_scorer.lddt_mdl.chain_count, 1) - self.assertEqual(qs_scorer.lddt_ref.chain_count, 1) - # unique resnum? - resnum_mdl = [r.number.num for r in qs_scorer.lddt_mdl.residues] - resnum_mdl_set = set(resnum_mdl) - self.assertEqual(len(resnum_mdl), len(resnum_mdl_set)) - resnum_ref = [r.number.num for r in qs_scorer.lddt_ref.residues] - resnum_ref_set = set(resnum_ref) - self.assertEqual(len(resnum_ref), len(resnum_ref_set)) - # independent shared residues count from mapped_residues - num_shared = sum(len(v) for _,v in qs_scorer.mapped_residues.iteritems()) - shared_set = resnum_ref_set.intersection(resnum_mdl_set) - self.assertEqual(len(shared_set), num_shared) - # "lddt" prop on residues and B-factors? - for r in qs_scorer.lddt_mdl.residues: - if r.number.num in shared_set: - self.assertTrue(r.HasProp('lddt')) - r_lddt = r.GetFloatProp('lddt') - else: - r_lddt = 0 - self.assertTrue(all([a.b_factor == r_lddt for a in r.atoms])) - - if __name__ == "__main__": try: settings.Locate(('clustalw', 'clustalw2')) diff --git a/modules/conop/tests/testfiles/cbeta.pdb b/modules/mol/alg/tests/testfiles/cbeta.pdb similarity index 100% rename from modules/conop/tests/testfiles/cbeta.pdb rename to modules/mol/alg/tests/testfiles/cbeta.pdb diff --git a/modules/conop/tests/testfiles/sep.pdb b/modules/mol/alg/tests/testfiles/sep.pdb similarity index 100% rename from modules/conop/tests/testfiles/sep.pdb rename to modules/mol/alg/tests/testfiles/sep.pdb diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 653625756a1b38c696a50da10840078fb4d74c58..6e956923bf33a9044cd3b38289c3df525b502bf2 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -118,7 +118,13 @@ The Handle Classes an enabled ``USE_NUMPY`` flag (see :ref:`here <cmake-flags>` for details). :type: :class:`numpy.array` - + + .. attribute:: valid + + Validity of handle. + + :type: bool + .. method:: GetName() :returns: Name associated to this entity. @@ -327,6 +333,10 @@ The Handle Classes :type radius: float :returns: :class:`AtomHandleList` (list of :class:`AtomHandle`) + + .. method:: IsValid() + + See :attr:`valid` .. class:: ChainHandle @@ -424,6 +434,12 @@ The Handle Classes :meth:`GetCenterOfAtoms`. :type: :class:`~ost.geom.Vec3` + + .. attribute:: valid + + Validity of handle. + + :type: bool .. method:: FindResidue(res_num) @@ -469,6 +485,10 @@ The Handle Classes See :attr:`description` + .. method:: IsValid() + + See :attr:`valid` + .. class:: ResidueHandle The residue is either used to represent complete molecules or building blocks @@ -620,6 +640,43 @@ The Handle Classes Residue index (starting at 0) within chain. + .. attribute:: central_atom + + Central atom used for rendering traces. For peptides, this is usually + the CA atom. For nucleotides, this is usually the P atom. + + :type: :class:`AtomHandle` + + .. attribute:: central_normal + + Normal computed for :attr:`central_atom`. Only defined for peptides and + nucleotides if all required atoms available. Otherwise, the (1,0,0) vector + is returned. + + :type: :class:`~ost.geom.Vec3` + + .. attribute:: valid + + Validity of handle. + + :type: bool + + .. attribute:: next + + Residue after this one in the same chain. Invalid handle returned if there + is no next residue. Residues are ordered as in :attr:`ChainHandle.residues` + independently on whether they are connected or not (see + :func:`InSequence` to check for connected residues). + + :type: :class:`ResidueHandle` + + .. attribute:: prev + + Residue before this one in the same chain. Otherwise same behaviour as + :attr:`next`. + + :type: :class:`ResidueHandle` + .. method:: FindAtom(atom_name) Get atom by atom name. See also :attr:`atoms` @@ -666,7 +723,20 @@ The Handle Classes .. method:: GetIndex() See :attr:`index` + + .. method:: GetCentralAtom() + SetCentralAtom() + + See :attr:`central_atom` + + .. method:: GetCentralNormal() + + See :attr:`central_normal` + + .. method:: IsValid() + See :attr:`valid` + .. class:: AtomHandle @@ -732,6 +802,7 @@ The Handle Classes The atom's occupancy in the range 0 to 1. Read/write. Also available as :meth:`GetOccupancy`, :meth:`SetOccupancy`. + :type: float .. attribute:: b_factor @@ -782,6 +853,12 @@ The Handle Classes :type: int + .. attribute:: valid + + Validity of handle. + + :type: bool + .. method:: FindBondToAtom(other_atom) Finds and returns the bond formed between this atom and `other_atom`. If no @@ -899,8 +976,7 @@ The Handle Classes .. method:: IsValid() See :attr:`valid` - - :rtype: bool + The View Classes -------------------------------------------------------------------------------- @@ -981,6 +1057,12 @@ The View Classes :type: :class:`EntityHandle` + .. attribute:: valid + + Validity of view. + + :type: bool + .. method:: GetName() :returns: :func:`~EntityHandle.GetName` of entity :attr:`handle`. @@ -1027,8 +1109,8 @@ The View Classes :param chain_handle: The chain handle to be added. :type chain_handle: :class:`ChainHandle` - :param view_add_flags: An ORed together combination of :class:`ViewAddFlags` - :type view_add_flags: :class:`int` / :class:`ViewAddFlags` + :param view_add_flags: An ORed together combination of :class:`ViewAddFlag` + :type view_add_flags: :class:`int` / :class:`ViewAddFlag` :rtype: :class:`ChainView` .. method:: AddResidue(residue_handle[, view_add_flags]) @@ -1036,12 +1118,12 @@ The View Classes Add residue to view. If the residue's chain is not already part of the view, it will be added. By default, only the residue is added, but not its atoms. This behaviour can be modified by passing in an appropriate - combination of :class:`ViewAddFlags`. + combination of :class:`ViewAddFlag`. :param residue_handle: The residue handle to be added :type residue_handle: :class:`ResidueHandle` - :param view_add_flags: An ORed together combination of :class:`ViewAddFlags` - :type view_add_flags: :class:`int` / :class:`ViewAddFlags` + :param view_add_flags: An ORed together combination of :class:`ViewAddFlag` + :type view_add_flags: :class:`int` / :class:`ViewAddFlag` :rtype: :class:`ResidueView` .. method:: AddAtom(atom_handle[, view_add_flags]) @@ -1051,8 +1133,8 @@ The View Classes :param atom_handle: The atom handle :type atom_handle: :class:`AtomHandle` - :param view_add_flags: An ORed together combination of :class:`ViewAddFlags` - :type view_add_flags: :class:`int` / :class:`ViewAddFlags` + :param view_add_flags: An ORed together combination of :class:`ViewAddFlag` + :type view_add_flags: :class:`int` / :class:`ViewAddFlag` :rtype: :class:`AtomView` .. method:: AddBond(bond_handle) @@ -1199,7 +1281,8 @@ The View Classes .. method:: GetBondCount() Get number of bonds - :rtype: int + + :rtype: :class:`int` .. method:: GetBondList() @@ -1241,6 +1324,10 @@ The View Classes See :attr:`atom_count` + .. method:: IsValid() + + See :attr:`valid` + .. class:: ChainView A view representation of a :class:`ChainHandle`. Mostly, the same @@ -1350,7 +1437,7 @@ The View Classes .. attribute:: valid - Validity of handle. + Validity of view. :type: bool @@ -1362,8 +1449,8 @@ The View Classes :param atom_handle: The atom to be added :type atom_handle: :class:`AtomHandle` - :param view_add_flags: An ORed together combination of :class:`ViewAddFlags` - :type view_add_flags: :class:`int` / :class:`ViewAddFlags` + :param view_add_flags: An ORed together combination of :class:`ViewAddFlag` + :type view_add_flags: :class:`int` / :class:`ViewAddFlag` :rtype: :class:`AtomView` .. method:: AddResidue(residue_handle[, view_add_flags]) @@ -1371,12 +1458,12 @@ The View Classes Add residue to the view. If the atom does not belong to chain, the result is undefined. By default, only the residue, but no atoms are added to the view. To change the behavior, pass in a suitable combination of - :class:`ViewAddFlags`. + :class:`ViewAddFlag`. :param residue_handle: The residue handle to be added. :type residue_handle: :class:`ResidueHandle` - :param view_add_flags: An ORed together combination of :class:`ViewAddFlags` - :type view_add_flags: :class:`int` / :class:`ViewAddFlags` + :param view_add_flags: An ORed together combination of :class:`ViewAddFlag` + :type view_add_flags: :class:`int` / :class:`ViewAddFlag` :rtype: :class:`ResidueView` .. method:: FindAtom(res_num, atom_name) @@ -1627,8 +1714,8 @@ The View Classes :param atom_handle: Atom handle to be added :type atom_handle: :class:`AtomHandle` - :param flags: An ORed together combination of :class:`ViewAddFlags` - :type flags: :class:`int` / :class:`ViewAddFlags` + :param flags: An ORed together combination of :class:`ViewAddFlag` + :type flags: :class:`int` / :class:`ViewAddFlag` :rtype: :class:`AtomView` .. method:: GetCenterOfAtoms() @@ -1752,11 +1839,33 @@ Other Entity-Related Functions :returns: :class:`EntityHandle` +.. function:: InSequence(res, res_next) + + :return: True, if both *res* and *res_next* are :attr:`~ResidueHandle.valid`, + *res_next* is the residue following *res* (see + :attr:`ResidueHandle.next`), both residues are linking (i.e. + :attr:`~ChemClass.IsPeptideLinking` or + :attr:`~ChemClass.IsNucleotideLinking`) and are connected by an + appropriate bond. + :rtype: :class:`bool` + :param res: First residue to check. + :type res: :class:`ResidueHandle` + :param res_next: Second residue to check. + :type res_next: :class:`ResidueHandle` + +.. function:: BondExists(atom_a, atom_b) + + :return: True, if *atom_a* and *atom_b* are connected by a bond. + :rtype: :class:`bool` + :param atom_a: First atom to check. + :type atom_a: :class:`AtomHandle` + :param atom_b: Second atom to check. + :type atom_b: :class:`AtomHandle` Residue Numbering -------------------------------------------------------------------------------- -.. class:: ResNum(num, ins_code='\0') +.. class:: ResNum(num, ins_code='\\0') Number for a residue. The residue number has a numeric part and an (optional) insertion-code. You can work with this object as if it was an integer and @@ -1834,10 +1943,10 @@ here. :returns: :class:`str` -ViewAddFlags +ViewAddFlag -------------------------------------------------------------------------------- -.. class:: ViewAddFlags +.. class:: ViewAddFlag Defines flags controlling behaviour of routines adding handles to views: @@ -1849,6 +1958,8 @@ ViewAddFlags * ``CHECK_DUPLICATES`` - If set, it will be checked that no duplicates are created when adding a new handle + Flags can be ORed to combine them. + SecStructure -------------------------------------------------------------------------------- diff --git a/modules/mol/base/doc/query.rst b/modules/mol/base/doc/query.rst index 54380f93f0f8b952934e31ef8aea865b1b89cbd8..6cf82e2c44df6b821db9bc9b6021c7903ff974b5 100644 --- a/modules/mol/base/doc/query.rst +++ b/modules/mol/base/doc/query.rst @@ -16,7 +16,7 @@ selections in a convenient way. Selections are carried out mainly by calling the .. code-block:: python - arginines=model.Select('rname=ARG') + arginines = model.Select('rname=ARG') A simple selection query (called a predicate) consists of a property (here, `rname`), a comparison operator (here, `=`) and an argument (here, `ARG`). The @@ -24,7 +24,7 @@ return value of a call to the :meth:`EntityHandle.Select` method is always an :class:`EntityView`. The :class:`EntityView` always contains a full hierarchy of elements, never standalone separated elements. In the above example, the :class:`EntityView` called `arginines` will contain all chains from the -structure called 'model' that have at least one arginine. In turn these chains +structure called `model` that have at least one arginine. In turn these chains will contain all residues that have been identified as arginines. The residues themselves will contain references to all of their atoms. Of course, queries are not limited to selecting residues based on their type, it is also possible to @@ -32,9 +32,9 @@ select atom by name: .. code-block:: python - c_betas=model.Select('aname=CB') + c_betas = model.Select('aname=CB') -As before, c`betas is an instance of an :class:`EntityView` object and contains +As before, `c_betas` is an instance of an :class:`EntityView` object and contains a full hierarchy. The main difference to the previous example is that the selected residues do not contain a list of all of their atoms but only the C-beta. These examples clarify why the name 'view' was chosen for this result of @@ -45,8 +45,17 @@ Both the selection statements that have been used so far take strings as their a .. code-block:: python - n_term=model.Select('rnum<=20') - + n_term = model.Select('rnum<=20') + +If you want to supply arguments with special characters they need to be put in +quotation marks. For instance, this is needed to select the chain named "_" or +for any chain name conatining ".", " " or ",". Hence, chain "_" can be selected +with: + +.. code-block:: python + + model.Select('cname="_"') + Combining predicates ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -60,25 +69,25 @@ Compact forms are available for several selection statements. For example, to se .. code-block:: python - arg_and_asn=model.Select('rname=ARG or rname=ASN') + arg_and_asn = model.Select('rname=ARG or rname=ASN') However, this is rather cumbersome as it requires the word `rname` to be typed twice. Since the only difference between the two parts of the selection is the argument that follows the word `rname`, the statement can also be written in an abbreviated form: .. code-block:: python - arg_and_asn=model.Select('rname=ARG,ASN') + arg_and_asn = model.Select('rname=ARG,ASN') Another example: to select residues with numbers in the range 130 to 200, one could use the following statement .. code-block:: python - center=model.Select('rnum>=130 and rnum<=200') + center = model.Select('rnum>=130 and rnum<=200') or alternatively use the much nicer syntax: .. code-block:: python - center=model.Select('rnum=130:200') + center = model.Select('rnum=130:200') This last statement is completely equivalent to the previous one. This syntax can be used when the selection statement requires a range of integer values @@ -91,25 +100,38 @@ The query .. code-block:: python - around_center=model.Select('5 <> {0,0,0}') + around_center = model.Select('5 <> {0,0,0}') -selects all chains, residues and atoms that lie with 5 Ã… to the origin of the reference system ({0,0,0}). The `<>` operator is called the ‘within’ operator. -Instead of a point, the within statements can also be used to return a view containing all chains, residues and atoms within a radius of another selection statement applied to the same entity. Square brackets are used to delimit the inner query statement. +selects all chains, residues and atoms that lie with 5 Ã… to the origin of the +reference system ({0,0,0}). The `<>` operator is called the 'within' operator. +Instead of a point, the within statements can also be used to return a view +containing all chains, residues and atoms within a radius of another selection +statement applied to the same entity. Square brackets are used to delimit the +inner query statement. .. code-block:: python - around_hem=model.Select('5 <> [rname=HEM]') + around_hem = model.Select('5 <> [rname=HEM]') model.Select('5 <> [rname=HEM and ele=C] and rname!=HEM') Bonds and Queries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When an :class:`EntityView` is generated by a selection, it includes by default only bonds for which both connected atoms satisfy the query statement. This can be changed by passing the parameters `EXCLUSIVE_BONDS` or `NO_BONDS` when calling the Select method. `EXCLUSIVE_BONDS` adds bonds to the :class:`EntityView` when at least one of the two atoms falls within the boundary of the selection. `NO_BONDS` suppresses the bond inclusion step completely. +When an :class:`EntityView` is generated by a selection, it includes by default +only bonds for which both connected atoms satisfy the query statement. This can +be changed by passing the parameters `EXCLUSIVE_BONDS` or `NO_BONDS` when +calling the Select method. `EXCLUSIVE_BONDS` adds bonds to the +:class:`EntityView` when at least one of the two atoms falls within the boundary +of the selection. `NO_BONDS` suppresses the bond inclusion step completely. Whole Residue Queries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If the parameter `MATCH_RESIDUES` is passed when the Select method is called, the resulting :class:`EntityView` will include whole residues for which at least one atom satisfies the query. This means that if at least one atom in the residue falls within the boundaries of the selection, all atoms of the residue will be included in the View. +If the parameter `MATCH_RESIDUES` is passed when the Select method is called, +the resulting :class:`EntityView` will include whole residues for which at least +one atom satisfies the query. This means that if at least one atom in the +residue falls within the boundaries of the selection, all atoms of the residue +will be included in the View. More Query Usage ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -141,9 +163,9 @@ they are defined. Therefore, all generic properties start with a `g`, followed b chain_handle.SetIntProp("testpropchain", 10) # query statements - sel_a=e.Select("gatestpropatom<=10.0") - sel_r=e.Select("grtestpropres=1.0") - sel_c=e.Select("gctestpropchain>5") + sel_a = e.Select("gatestpropatom<=10.0") + sel_r = e.Select("grtestpropres=1.0") + sel_c = e.Select("gctestpropchain>5") Since generic properties do not need to be defined for all parts of an entity (e.g. it could be specified for one single :class:`AtomHandle`), the query @@ -154,11 +176,11 @@ statement which can be done using a ':' character: # if one or more atoms have no generic properties - sel=e.Select("gatestprop=5") + sel = e.Select("gatestprop=5") # this will throw an error # you can specify a default value: - sel=e.Select("gatestprop:1.0=5") + sel = e.Select("gatestprop:1.0=5") # this will run through smoothly and use 1.0 as # the default value for all atoms that do not # have the generic property 'testprop' diff --git a/modules/mol/base/pymod/export_entity_view.cc b/modules/mol/base/pymod/export_entity_view.cc index 2422c154ad6a9f6e764a824264066b550807d9b3..19781a94f2ae3e6019d7e57b0eb55e6119b3d1e2 100644 --- a/modules/mol/base/pymod/export_entity_view.cc +++ b/modules/mol/base/pymod/export_entity_view.cc @@ -197,10 +197,10 @@ void export_EntityView() make_function(&EntityView::GetBondList, return_value_policy<reference_existing_object>())) .def("GetChainList", &EntityView::GetChainList, - return_value_policy<reference_existing_object>()) + return_value_policy<copy_const_reference>()) .add_property("chains", make_function(&EntityView::GetChainList, - return_value_policy<reference_existing_object>())) + return_value_policy<copy_const_reference>())) .def(self==self) .def(self!=self) .def("Dump", &EntityView::Dump) diff --git a/modules/mol/base/src/bond_handle.cc b/modules/mol/base/src/bond_handle.cc index 3be1b88bd7017c7005e2912a71d689bb398576bc..9a68a860dc5db33eea4a28fdc807407acb889ec1 100644 --- a/modules/mol/base/src/bond_handle.cc +++ b/modules/mol/base/src/bond_handle.cc @@ -47,7 +47,7 @@ BondHandle::operator bool() const } bool BondHandle::IsValid() const { - return impl_; + return static_cast<bool>(impl_); } AtomHandle BondHandle::GetFirst() const diff --git a/modules/mol/base/src/entity_view.cc b/modules/mol/base/src/entity_view.cc index bb585270e932298e7bc4ca3ce340c0872177437e..c5b9c2ca5c4488c8cbd4f7184faa982d4375554d 100644 --- a/modules/mol/base/src/entity_view.cc +++ b/modules/mol/base/src/entity_view.cc @@ -322,7 +322,7 @@ EntityView EntityView::Select(const Query& query, QueryFlags flags) const tribool c=qs.EvalChain(src_chain.Impl()); if (c==true) { // add everything , i.e. all residues and atoms of this chain. - // We can't use AddChain(chain, AddViewFlags::INCLUDE_ALL) since we need + // We can't use AddChain(chain, ViewAddFlag::INCLUDE_ALL) since we need // to keep track of all atoms added to get the bonds right. ChainView dst_chain=view.AddChain(src_chain.GetHandle()); const ResidueViewList& residues=src_chain.GetResidueList(); diff --git a/modules/mol/base/src/impl/atom_impl.cc b/modules/mol/base/src/impl/atom_impl.cc index 7bc585b14662360f400d241ca77e4a48ee8b1184..f0852b73845a7ae3181109154203f40a3435d64c 100644 --- a/modules/mol/base/src/impl/atom_impl.cc +++ b/modules/mol/base/src/impl/atom_impl.cc @@ -227,7 +227,7 @@ std::ostream& operator<<(std::ostream& o, const AtomImplPtr ap) } bool ConnectorExists(const AtomImplPtr& a, const AtomImplPtr& b) { - return GetConnector(a, b); + return static_cast<bool>(GetConnector(a, b)); } ConnectorImplP GetConnector(const AtomImplPtr& a, const AtomImplPtr& b) { diff --git a/modules/mol/base/src/impl/residue_impl.cc b/modules/mol/base/src/impl/residue_impl.cc index fe1e7f60b0448d79e661fabc2b4cbe2630cd58ae..014f4069f5cae65e808d25151db5cee4ef10f213 100644 --- a/modules/mol/base/src/impl/residue_impl.cc +++ b/modules/mol/base/src/impl/residue_impl.cc @@ -212,12 +212,12 @@ AtomImplPtr ResidueImpl::GetCentralAtom() const for (AtomImplList::const_iterator it=atom_list_.begin(); it!=atom_list_.end();++it) { if((*it)->Name()=="P") return *it; - } + } } else if (chem_class_.IsPeptideLinking()) { for (AtomImplList::const_iterator it=atom_list_.begin(); it!=atom_list_.end();++it) { if((*it)->Name()=="CA") return *it; - } + } } return AtomImplPtr(); @@ -266,18 +266,21 @@ geom::Vec3 ResidueImpl::GetCentralNormal() const geom::Vec3 nrvo(1,0,0); if (chem_class_.IsPeptideLinking()) { AtomImplPtr a1 = FindAtom("C"); - AtomImplPtr a2 = FindAtom("O"); + AtomImplPtr a2 = FindAtom("O"); if(a1 && a2) { nrvo = geom::Normalize(a2->TransformedPos()-a1->TransformedPos()); } else { a1 = FindAtom("CB"); - a2 = FindAtom("CA"); + a2 = FindAtom("CA"); if(a1 && a2) { nrvo = geom::Normalize(a2->TransformedPos()-a1->TransformedPos()); } else { - geom::Vec3 v0=GetCentralAtom()->TransformedPos(); - nrvo=geom::Cross(geom::Normalize(v0), - geom::Normalize(geom::Vec3(-v0[2],v0[0],v0[1]))); + AtomImplPtr a0 = GetCentralAtom(); + if (a0) { + geom::Vec3 v0 = a0->TransformedPos(); + nrvo = geom::Cross(geom::Normalize(v0), + geom::Normalize(geom::Vec3(-v0[2], v0[0], v0[1]))); + } LOG_VERBOSE("warning: could not find atoms for proper central normal calculation"); } } @@ -288,9 +291,12 @@ geom::Vec3 ResidueImpl::GetCentralNormal() const if(a1 && a2 && a3) { nrvo = geom::Normalize(a1->TransformedPos()-(a2->TransformedPos()+a3->TransformedPos())*.5); } else { - geom::Vec3 v0=GetCentralAtom()->TransformedPos(); - nrvo=geom::Cross(geom::Normalize(v0), - geom::Normalize(geom::Vec3(-v0[2],v0[0],v0[1]))); + AtomImplPtr a0 = GetCentralAtom(); + if (a0) { + geom::Vec3 v0 = a0->TransformedPos(); + nrvo = geom::Cross(geom::Normalize(v0), + geom::Normalize(geom::Vec3(-v0[2], v0[0], v0[1]))); + } LOG_VERBOSE("warning: could not find atoms for proper central normal calculation"); } } diff --git a/modules/mol/base/src/surface_handle.hh b/modules/mol/base/src/surface_handle.hh index 3524375d36ab919f072d3a8d19fe78d4e74d387b..a11b3a91e9768535716c30e25f5026e8fdc04c44 100644 --- a/modules/mol/base/src/surface_handle.hh +++ b/modules/mol/base/src/surface_handle.hh @@ -70,7 +70,7 @@ public: // flip normals void Invert(); - bool IsValid() const {return impl_;} + bool IsValid() const {return static_cast<bool>(impl_);} bool operator==(const SurfaceHandle& ref) const { return impl_==ref.impl_; } diff --git a/modules/mol/base/tests/test_residue.cc b/modules/mol/base/tests/test_residue.cc index 89e8da33914110dc1d50150fb9dc19e2ab3dd50e..d828544b7eba1ef47d5c995b4341b438e68e8c9d 100644 --- a/modules/mol/base/tests/test_residue.cc +++ b/modules/mol/base/tests/test_residue.cc @@ -125,4 +125,92 @@ BOOST_AUTO_TEST_CASE(rename_res) BOOST_CHECK_EQUAL(rA2B.GetName(), "B"); } +BOOST_AUTO_TEST_CASE(test_centralatom) +{ + // COOK UP ENTITY FOR TEST + EntityHandle eh = CreateEntity(); + XCSEditor e = eh.EditXCS(); + ChainHandle ch = e.InsertChain("A"); + // decent peptide with all entries + ResidueHandle rp1 = e.AppendResidue(ch, "A"); + e.InsertAtom(rp1, "CA", geom::Vec3(2, 0, 0)); + e.InsertAtom(rp1, "CB", geom::Vec3(1, 0, 0)); + e.InsertAtom(rp1, "C", geom::Vec3(0, 0, 0)); + e.InsertAtom(rp1, "O", geom::Vec3(0, 1, 0)); + rp1.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // weird peptide with only CA and CB + ResidueHandle rp2 = e.AppendResidue(ch, "B"); + e.InsertAtom(rp2, "CA", geom::Vec3(3, 0, 0)); + e.InsertAtom(rp2, "CB", geom::Vec3(3, 1, 0)); + rp2.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // CA-only peptide + ResidueHandle rp3 = e.AppendResidue(ch, "C"); + e.InsertAtom(rp3, "CA", geom::Vec3(4, 0, 0)); + rp3.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // peptide with custom atoms + ResidueHandle rp4 = e.AppendResidue(ch, "D"); + AtomHandle rp4_ax = e.InsertAtom(rp4, "XX", geom::Vec3(5, 0, 0)); + rp4.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // nucleotide with all needed entries + ResidueHandle rn1 = e.AppendResidue(ch, "E"); + e.InsertAtom(rn1, "P", geom::Vec3(6, 0, 0)); + e.InsertAtom(rn1, "OP1", geom::Vec3(6, 0.5, 0)); + e.InsertAtom(rn1, "OP2", geom::Vec3(6, 1.5, 0)); + rn1.SetChemClass(ChemClass(ChemClass::DNA_LINKING)); + // nucleotide with only P + ResidueHandle rn2 = e.AppendResidue(ch, "F"); + e.InsertAtom(rn2, "P", geom::Vec3(7, 0, 0)); + rn2.SetChemClass(ChemClass(ChemClass::DNA_LINKING)); + // nucleotide with custom atoms + ResidueHandle rn3 = e.AppendResidue(ch, "G"); + AtomHandle rn3_ax = e.InsertAtom(rn3, "XX", geom::Vec3(8, 0, 0)); + rn3.SetChemClass(ChemClass(ChemClass::DNA_LINKING)); + // unknown chem class + ResidueHandle ru = e.AppendResidue(ch, "H"); + e.InsertAtom(ru, "P", geom::Vec3(9, 0, 0)); + e.InsertAtom(ru, "CA", geom::Vec3(9, 1, 0)); + AtomHandle ru_ax = e.InsertAtom(ru, "XX", geom::Vec3(9, 2, 0)); + ru.SetChemClass(ChemClass(ChemClass::UNKNOWN)); + + // CHECK CENTRAL ATOMS + BOOST_CHECK(rp1.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp1.GetCentralAtom().GetQualifiedName(), "A.A1.CA"); + BOOST_CHECK(rp2.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp2.GetCentralAtom().GetQualifiedName(), "A.B2.CA"); + BOOST_CHECK(rp3.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp3.GetCentralAtom().GetQualifiedName(), "A.C3.CA"); + BOOST_CHECK(!rp4.GetCentralAtom().IsValid()); + BOOST_CHECK(rn1.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rn1.GetCentralAtom().GetQualifiedName(), "A.E5.P"); + BOOST_CHECK(rn2.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rn2.GetCentralAtom().GetQualifiedName(), "A.F6.P"); + BOOST_CHECK(!rn3.GetCentralAtom().IsValid()); + BOOST_CHECK(!ru.GetCentralAtom().IsValid()); + + // CHECK NORMALS + BOOST_CHECK_EQUAL(rp1.GetCentralNormal(), geom::Vec3(0, 1, 0)); + BOOST_CHECK_EQUAL(rp2.GetCentralNormal(), geom::Vec3(0, -1, 0)); + BOOST_CHECK_EQUAL(rp3.GetCentralNormal(), geom::Vec3(0, 0, 1)); + BOOST_CHECK_EQUAL(rp4.GetCentralNormal(), geom::Vec3(1, 0, 0)); + BOOST_CHECK_EQUAL(rn1.GetCentralNormal(), geom::Vec3(0, -1, 0)); + BOOST_CHECK_EQUAL(rn2.GetCentralNormal(), geom::Vec3(0, 0, 1)); + BOOST_CHECK_EQUAL(rn3.GetCentralNormal(), geom::Vec3(1, 0, 0)); + BOOST_CHECK_EQUAL(ru.GetCentralNormal(), geom::Vec3(1, 0, 0)); + + // CHECK SETTING CENTRAL ATOMS + rp4.SetCentralAtom(rp4_ax); + BOOST_CHECK(rp4.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp4.GetCentralAtom().GetQualifiedName(), "A.D4.XX"); + BOOST_CHECK_EQUAL(rp4.GetCentralNormal(), geom::Vec3(0, 0, 1)); + rn3.SetCentralAtom(rn3_ax); + BOOST_CHECK(rn3.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rn3.GetCentralAtom().GetQualifiedName(), "A.G7.XX"); + BOOST_CHECK_EQUAL(rn3.GetCentralNormal(), geom::Vec3(0, 0, 1)); + ru.SetCentralAtom(ru_ax); + BOOST_CHECK(ru.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(ru.GetCentralAtom().GetQualifiedName(), "A.H8.XX"); + // no normal for unknown residues + BOOST_CHECK_EQUAL(ru.GetCentralNormal(), geom::Vec3(1, 0, 0)); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/mol/mm/src/simulation.cc b/modules/mol/mm/src/simulation.cc index 2a23ddb9a629a112b3301c35dd6756ad2ba08b92..dea985213bf993a1a49f0ec06c17f7b34d4e36fe 100644 --- a/modules/mol/mm/src/simulation.cc +++ b/modules/mol/mm/src/simulation.cc @@ -27,10 +27,10 @@ Simulation::Simulation(const ost::mol::EntityHandle& handle, //note, that ent_ will be "completed" inside this function! //(hydrogens and shit) - - ent_ = handle.Copy(); - TopologyPtr top = TopologyCreator::Create(ent_,settings); - this->Init(top, settings); + + ost::mol::EntityHandle ent = handle.Copy(); + TopologyPtr top = TopologyCreator::Create(ent,settings); + this->Init(top, ent, settings); } Simulation::Simulation(const TopologyPtr top, @@ -40,21 +40,16 @@ Simulation::Simulation(const TopologyPtr top, if(static_cast<uint>(handle.GetAtomCount()) != top->GetNumParticles()){ throw ost::Error("Number of atoms in entity must be consistent with number of particles in topology!"); } - ent_ = handle.Copy(); - this->Init(top, settings); + ost::mol::EntityHandle ent = handle.Copy(); + this->Init(top, ent, settings); } void Simulation::Save(const String& filename){ + std::ofstream stream(filename.c_str(), std::ios_base::binary); io::BinaryDataSink ds(stream); + ds << *top_; - geom::Vec3List positions = this->GetPositions(false,false); - for(geom::Vec3List::iterator i = positions.begin(); - i != positions.end(); ++i){ - ds & (*i)[0]; - ds & (*i)[1]; - ds & (*i)[2]; - } uint num_chains; uint num_residues; @@ -96,15 +91,11 @@ void Simulation::Save(const String& filename){ k != atom_list.end(); ++k){ atom_name = k->GetName(); atom_element = k->GetElement(); - geom::Vec3 pos = k->GetPos(); bfac = k->GetBFactor(); occ = k->GetOccupancy(); is_hetatm = k->IsHetAtom(); ds & atom_name; ds & atom_element; - ds & pos[0]; - ds & pos[1]; - ds & pos[2]; ds & bfac; ds & occ; ds & is_hetatm; @@ -112,19 +103,18 @@ void Simulation::Save(const String& filename){ } } - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); ost::mol::AtomHandleList bonded_atoms; std::map<long,int> atom_indices; int actual_index = 0; - for(ost::mol::AtomHandleList::const_iterator i = atom_list.begin(), e = atom_list.end(); - i != e; ++i){ + for(ost::mol::AtomHandleList::const_iterator i = atom_list_.begin(), + e = atom_list_.end(); i != e; ++i){ atom_indices[i->GetHashCode()] = actual_index; ++actual_index; } - for(ost::mol::AtomHandleList::iterator i = atom_list.begin(); - i != atom_list.end(); ++i){ + for(ost::mol::AtomHandleList::iterator i = atom_list_.begin(); + i != atom_list_.end(); ++i){ bonded_atoms = i->GetBondPartners(); num_bonded_atoms = bonded_atoms.size(); ds & num_bonded_atoms; @@ -139,6 +129,7 @@ void Simulation::Save(const String& filename){ } SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ + if (!boost::filesystem::exists(filename)) { std::stringstream ss; ss << "Could not open simulation File '" @@ -146,71 +137,17 @@ SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ throw ost::io::IOException(ss.str()); } - SimulationPtr sim_ptr(new Simulation); - std::ifstream stream(filename.c_str(), std::ios_base::binary); io::BinaryDataSource ds(stream); - TopologyPtr top_p(new Topology); - ds >> *top_p; - - sim_ptr->top_ = top_p; - - sim_ptr->system_ = SystemCreator::Create(sim_ptr->top_,settings, - sim_ptr->system_force_mapper_); - - sim_ptr->integrator_ = settings->integrator; - - OpenMM::Platform::loadPluginsFromDirectory (settings->openmm_plugin_directory); - OpenMM::Platform::loadPluginsFromDirectory (settings->custom_plugin_directory); - OpenMM::Platform* platform; - - switch(settings->platform){ - case Reference:{ - platform = &OpenMM::Platform::getPlatformByName("Reference"); - break; - } - case OpenCL:{ - platform = &OpenMM::Platform::getPlatformByName("OpenCL"); - break; - } - case CUDA:{ - platform = &OpenMM::Platform::getPlatformByName("CUDA"); - break; - } - case CPU:{ - platform = &OpenMM::Platform::getPlatformByName("CPU"); - break; - } - default:{ - throw ost::Error("Invalid Platform when Loading simulation!"); - } - } - - sim_ptr->context_ = ContextPtr(new OpenMM::Context(*(sim_ptr->system_), - *(sim_ptr->integrator_), - *platform)); - std::vector<OpenMM::Vec3> positions; - OpenMM::Vec3 open_mm_vec; - Real a,b,c; - for(int i = 0; i < sim_ptr->system_->getNumParticles(); ++i){ - ds & a; - ds & b; - ds & c; - open_mm_vec[0] = a; - open_mm_vec[1] = b; - open_mm_vec[2] = c; - positions.push_back(open_mm_vec); - } - sim_ptr->context_->setPositions(positions); + SimulationPtr sim_ptr(new Simulation); + TopologyPtr top(new Topology); + ds >> *top; uint num_chains; uint num_residues; uint num_atoms; uint num_bonded_atoms; - Real x_pos; - Real y_pos; - Real z_pos; Real bfac; Real occ; bool is_hetatm; @@ -239,29 +176,29 @@ SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ for(uint k = 0; k < num_atoms; ++k){ ds & atom_name; ds & atom_element; - ds & x_pos; - ds & y_pos; - ds & z_pos; ds & bfac; ds & occ; ds & is_hetatm; - geom::Vec3 pos(x_pos,y_pos,z_pos); - ed.InsertAtom(res,atom_name,pos,atom_element,occ,bfac,is_hetatm); + ed.InsertAtom(res, atom_name, geom::Vec3(0.0,0.0,0.0), + atom_element, occ, bfac, is_hetatm); } } } - ost::mol::AtomHandleList atom_list = ent.GetAtomList(); - for(uint i = 0; i < atom_list.size(); ++i){ + + sim_ptr->Init(top, ent, settings); + + for(uint i = 0; i < sim_ptr->atom_list_.size(); ++i){ ds & num_bonded_atoms; for(uint j = 0; j < num_bonded_atoms; ++j){ ds & atom_index; - ed.Connect(atom_list[i],atom_list[atom_index]); + ed.Connect(sim_ptr->atom_list_[i], sim_ptr->atom_list_[atom_index]); } } - sim_ptr->ent_ = ent; - + // also loads the positions that have been set in the context + // they get mapped over to the attached entity sim_ptr->context_->loadCheckpoint(stream); + sim_ptr->UpdatePositions(); return sim_ptr; } @@ -297,10 +234,12 @@ void Simulation::EnsurePluginsLoaded(const String& plugin_path) { void Simulation::Init(const TopologyPtr top, + const ost::mol::EntityHandle& ent, const SettingsPtr settings){ - top_ = top; + ent_ = ent; + atom_list_ = ent_.GetAtomList(); if(!settings->integrator){ //user did not specify an integrator, so let's just use a standard integrator @@ -358,12 +297,11 @@ void Simulation::Init(const TopologyPtr top, context_ = ContextPtr(new OpenMM::Context(*system_,*integrator_,*platform,context_properties)); - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); std::vector<OpenMM::Vec3> positions; geom::Vec3 ost_vec; OpenMM::Vec3 open_mm_vec; - for(ost::mol::AtomHandleList::iterator i = atom_list.begin(); - i!=atom_list.end();++i){ + for(ost::mol::AtomHandleList::iterator i = atom_list_.begin(); + i!=atom_list_.end();++i){ ost_vec = i->GetPos(); open_mm_vec[0] = ost_vec[0]/10; open_mm_vec[1] = ost_vec[1]/10; @@ -446,14 +384,12 @@ void Simulation::UpdatePositions(bool enforce_periodic_box){ if(top_->GetNumParticles() != static_cast<uint>(ent_.GetAtomCount())){ throw ost::Error("Num particles in topology and num atoms in entity are not consistent!"); } - geom::Vec3List positions = this->GetPositions(enforce_periodic_box, true); + geom::Vec3List positions; + StateExtractor::ExtractPositions(context_, positions, enforce_periodic_box, + true); ost::mol::XCSEditor ed = ent_.EditXCS(ost::mol::BUFFERED_EDIT); - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); - ost::mol::AtomHandleList::iterator a = atom_list.begin(); - ost::mol::AtomHandleList::iterator ae = atom_list.end(); - geom::Vec3List::iterator v = positions.begin(); - for(; a != ae; ++a, ++v){ - ed.SetAtomPos(*a,*v); + for(uint i = 0; i < atom_list_.size(); ++i) { + ed.SetAtomPos(atom_list_[i], positions[i]); } } diff --git a/modules/mol/mm/src/simulation.hh b/modules/mol/mm/src/simulation.hh index 89c58b10eb96f8b2b0a5995986d2ea80a47a43b7..7cbf06d1aededa101f55658b14f50f42d73dd6ca 100644 --- a/modules/mol/mm/src/simulation.hh +++ b/modules/mol/mm/src/simulation.hh @@ -143,6 +143,7 @@ private: Simulation() { } //hidden constructor... void Init(const ost::mol::mm::TopologyPtr top, + const ost::mol::EntityHandle& ent, const SettingsPtr settings); int TimeToNextNotification(); @@ -160,6 +161,7 @@ private: std::vector<int> time_to_notify_; std::map<FuncType,uint> system_force_mapper_; ost::mol::EntityHandle ent_; + ost::mol::AtomHandleList atom_list_; }; }}} //ns diff --git a/modules/seq/alg/pymod/renumber.py b/modules/seq/alg/pymod/renumber.py index 9f6dd02d56e9a1ff55c7c2c5e89ed6745f199af5..434732dc865f1c584c73079ecd456b17c5f8a0fb 100644 --- a/modules/seq/alg/pymod/renumber.py +++ b/modules/seq/alg/pymod/renumber.py @@ -1,6 +1,6 @@ from ost import seq, mol -def _RenumberSeq(seq_handle): +def _RenumberSeq(seq_handle, old_number_label=None): if not seq_handle.HasAttachedView(): raise RuntimeError("Sequence Handle has no attached view") ev = seq_handle.attached_view.CreateEmptyView() @@ -11,11 +11,13 @@ def _RenumberSeq(seq_handle): if r.IsValid(): ev.AddResidue(r, mol.INCLUDE_ALL) new_numbers.append(pos+1) + if old_number_label is not None: + r.SetIntProp(old_number_label, r.number.GetNum()) else: raise RuntimeError('Error: renumbering failed at position %s' % pos) return ev, new_numbers -def _RenumberAln(aln, seq_index): +def _RenumberAln(aln, seq_index, old_number_label=None): if not aln.sequences[seq_index].HasAttachedView(): raise RuntimeError("Sequence Handle has no attached view") counter=0 @@ -34,11 +36,13 @@ def _RenumberAln(aln, seq_index): % (counter)) ev.AddResidue(r, mol.INCLUDE_ALL) new_numbers.append(counter+1) + if old_number_label is not None: + r.SetIntProp(old_number_label, r.number.GetNum()) counter += 1 return ev, new_numbers -def Renumber(seq_handle, sequence_number_with_attached_view=1): +def Renumber(seq_handle, sequence_number_with_attached_view=1, old_number_label=None): """ Function to renumber an entity according to an alignment between the model sequence and the full-length target sequence. The aligned model sequence or @@ -70,9 +74,9 @@ def Renumber(seq_handle, sequence_number_with_attached_view=1): """ if isinstance(seq_handle, seq.SequenceHandle) \ or isinstance(seq_handle, seq.ConstSequenceHandle): - ev, new_numbers = _RenumberSeq(seq_handle) + ev, new_numbers = _RenumberSeq(seq_handle, old_number_label) elif isinstance(seq_handle, seq.AlignmentHandle): - ev, new_numbers = _RenumberAln(seq_handle, sequence_number_with_attached_view) + ev, new_numbers = _RenumberAln(seq_handle, sequence_number_with_attached_view, old_number_label) else: raise RuntimeError("Unknown input type " + str(type(seq_handle))) diff --git a/scripts/ost.in b/scripts/ost.in index f610b2446b4cddb8591d528cf2633879db0bd656..a20bef97b4fea5f3b1d98690aadea2c2d83f5677 100755 --- a/scripts/ost.in +++ b/scripts/ost.in @@ -28,9 +28,39 @@ else SCRIPT_NAME="$0" fi BIN_DIR=`dirname "$SCRIPT_NAME"` +OST_EXEC_DIR=$(cd $BIN_DIR/../@LIBEXEC_PATH@ && pwd) +export OST_EXEC_DIR -source "$BIN_DIR/../@LIBEXEC_PATH@/ost_config" +source "$OST_EXEC_DIR/ost_config" -$pyexec $interactive -c "execfile('$DNG_ROOT/@LIBDIR@/python@PYTHON_VERSION@/site-packages/ost/ost_startup.py')" $opts -RC=$? +ACTION="$1" +OST_SCRIPT="${OST_EXEC_DIR}/ost-${ACTION}" + +OLDIFS=$IFS +if test -e "${OST_SCRIPT}" ; then + opts="" + for argument in "${@:2}";do + if [ -n "$opts" ]; then + opts=$opts"#""$argument" + else + opts="$argument" + fi + done + IFS="#" + $pyexec -c "execfile('$DNG_ROOT/@LIBDIR@/python@PYTHON_VERSION@/site-packages/ost/ost_startup.py')" "${OST_SCRIPT}" $opts + RC=$? +else + opts="" + for argument in "$@";do + if [ -n "$opts" ]; then + opts=$opts"#""$argument" + else + opts="$argument" + fi + done + IFS="#" + $pyexec $interactive -c "execfile('$DNG_ROOT/@LIBDIR@/python@PYTHON_VERSION@/site-packages/ost/ost_startup.py')" $opts + RC=$? +fi +IFS=$OLDIFS exit $RC diff --git a/scripts/ost_config.in b/scripts/ost_config.in index afce131f195b2f17c0198ac7439402916b15f4fb..cda517095e9a64426b97e670bb3f130c44113860 100644 --- a/scripts/ost_config.in +++ b/scripts/ost_config.in @@ -76,15 +76,6 @@ fi set -o noglob -opts="" -for argument in "$@";do - if [ -n "$opts" ]; then - opts=$opts"#""$argument" - else - opts="$argument" - fi -done - # decide whether to start interactively or not # interactive mode can be forced by setting -i as a iplt option interactive="" @@ -95,6 +86,3 @@ else interactive="-i" fi fi - - -IFS="#" diff --git a/scripts/ost_startup.py.in b/scripts/ost_startup.py.in index 612b6485cf2515deffe6023f80e3ea4ea14e8ce5..d3cac43d8daa106a882f696bf8ee02db7ccb5c39 100644 --- a/scripts/ost_startup.py.in +++ b/scripts/ost_startup.py.in @@ -1,4 +1,4 @@ -import sys, os, platform +import sys, os, platform, glob import optparse def show_help(option, opt, value, parser): @@ -11,7 +11,22 @@ def interactive_flag(option, opt, value, parser): def stop(): sys.exit(0) -usage = 'usage: ost [ost options] [script to execute] [script parameters]' +usage = """ + + ost [ost options] [script to execute] [script parameters] + +or + ost [action name] [action options] + +""" + +action_path = os.path.abspath(os.environ.get("OST_EXEC_DIR", "")) + +usage += 'Following actions are available:\n' +for action in sorted(glob.glob(os.path.join(action_path, 'ost-*'))): + usage += " %s\n" % action[len(action_path)+5:] +usage += '\nEach action should respond to "--help".\n' + class OstOptionParser(optparse.OptionParser): def __init__(self, **kwargs): optparse.OptionParser.__init__(self, **kwargs) diff --git a/singularity/README.rst b/singularity/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..f90cf47553a96a3bf9eaab68970fcf91f2b109ff --- /dev/null +++ b/singularity/README.rst @@ -0,0 +1,67 @@ +OST Singularity +=============== + +Building Singularity image +-------------------------- + +In order to build OST Singularity image: + +.. code-block:: bash + + cd <OST ROOT>/singularity + sudo singularity build ost.img Singularity.1.8.0 + +.. note:: + + Running singularity build command requires root permissions (sudo). + +One can chose any name for an image. For the purose of this file we will assume +that the image name is ``ost.img``. + +Here we only keep the recipe for the most recent version of OpenStructure. To +build an image for a different version, you can either adapt the +``OPENSTRUCTURE_VERSION`` variable in the recipe or look in the git history for +an older recipe. + +Available apps +-------------- + +This container includes the following apps: + * **OST** - OpenStructure binary + * **IPython** - OST-powered iPython shell + * **Notebook** - A Jupyter notebook playground with OST and nglview + * **lDDT** - The Local Distance Difference Test + * **Molck** - Molecular checker + * **ChemdictTool** - Creating or update a compound library + +To see the help for each individual app run: + +.. code-block:: bash + + singularity help --app <APP NAME> <PATH TO OST IMAGE> + +Eg.: + +.. code-block:: bash + + singularity help --app OST ost.img + + +Facilitating the usage +---------------------- + +For each of these apps it is useful to create an alias if they will be +frequently used. Eg. to create an alias for IPython app one can run: + +.. code-block:: + + alias ost_ipython="singularity run --app IPython <PATH TO OST IMAGE>" + +Then (in the same terminal window) to invoke IPython app one can just type: + +.. code-block:: + + ost_ipython + +To make the alias permanent put it into your ``.bashrc`` file or whatever file +you use to store the aliases. diff --git a/singularity/Singularity.1.8.0 b/singularity/Singularity.1.8.0 new file mode 100644 index 0000000000000000000000000000000000000000..5a1e2600f40eecf259152800311d4686f124bed0 --- /dev/null +++ b/singularity/Singularity.1.8.0 @@ -0,0 +1,433 @@ +BootStrap: docker +From: ubuntu:16.04 + +%post +############################################################################## +# POST +############################################################################## + +# CHANGE DASH TO BASH +rm /bin/sh +ln -sf /bin/bash /bin/sh + +# DEFINE SOME ENV VARS USED DURING THE IMAGE BUILD +########################## +export SRC_FOLDER="/usr/local/src" +export CPUS_FOR_MAKE=8 +export PYTHONPATH="/usr/local/lib64/python2.7/site-packages:${PYTHONPATH}" +# When changing OPENSTRUCTURE_VERSION make sure to change it also in the +# environment section of singularity recipe (this file). +export OPENSTRUCTURE_VERSION="1.8.0" +export OPENSTRUCTURE_SHARE="/usr/local/share/ost" +export MSMS_VERSION="2.6.1" +export OPENMM_VERSION="7.1.1" +export DSSP_VERSION="2.2.1" +export OPENMM_INCLUDE_PATH=/usr/local/openmm/include/ +export OPENMM_LIB_PATH=/usr/local/openmm/lib/ +export JUPYTER_CONFIG_DIR="/usr/local/etc/jupyter" +export JUPYTER_PATH="/usr/local/share/jupyter" +export JUPYTER_RUNTIME_DIR="$JUPYTER_PATH/runtime" +export VIRTUALENV_DIR="/usr/local/share/ost_venv" + + +# INSTALL SYSTEM DEPS +##################### +apt-get update -y && apt-get install -y cmake \ + sip-dev \ + libtiff-dev \ + libfftw3-dev \ + libeigen3-dev \ + libboost-all-dev \ + libpng-dev \ + python-all \ + python2.7 \ + python-qt4 \ + qt4-qtconfig \ + qt4-qmake \ + libqt4-dev \ + libpng-dev \ + wget \ + git \ + gfortran \ + python-pip \ + tar \ + libbz2-dev \ + doxygen \ + swig \ + clustalw \ + python-virtualenv \ + locales + +# SET LOCALE +############ +echo "LC_ALL=en_US.UTF-8" >> /etc/environment +echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen +echo "LANG=en_US.UTF-8" > /etc/locale.conf +locale-gen en_US.UTF-8 + +# INSTALL SOME PYTHON PACKAGES GLOBALY +###################################### +pip install --no-cache-dir numpy==1.10.4 \ + scipy==1.0.0 \ + pandas==0.22.0 + + +# SET UP VIRTUALENV +################### +virtualenv --system-site-packages $VIRTUALENV_DIR +. $VIRTUALENV_DIR/bin/activate + + +# INSTALL REQUIRED PYTHON PACKAGES +################################## +pip install jupyter==1.0.0 \ + nglview==1.1.6 + +# DOWNLOAD AND INSTALL MSMS +############## +cd ${SRC_FOLDER} +if [ ! -f msms_i86_64Linux2_${MSMS_VERSION}.tar.gz ]; then + mkdir -p msms + cd msms + wget http://mgltools.scripps.edu/downloads/tars/releases/MSMSRELEASE/REL${MSMS_VERSION}/msms_i86_64Linux2_${MSMS_VERSION}.tar.gz + tar -xvzf msms_i86_64Linux2_${MSMS_VERSION}.tar.gz + cp -v ${SRC_FOLDER}/msms/msms.x86_64Linux2.${MSMS_VERSION} /usr/local/bin/msms + cp -v ${SRC_FOLDER}/msms/pdb_to_xyzr /usr/local/bin/pdb_to_xyzr + cp -v ${SRC_FOLDER}/msms/pdb_to_xyzrn /usr/local/bin/pdb_to_xyzrn +fi + +# COMPILE OPENMM FROM SOURCES. INSTALL TO /usr/local +############################ +cd ${SRC_FOLDER} +if [ ! -f openmm-${OPENMM_VERSION}.tar.gz ]; then + wget -O openmm-${OPENMM_VERSION}.tar.gz -nc https://github.com/pandegroup/openmm/archive/${OPENMM_VERSION}.tar.gz + mkdir ${SRC_FOLDER}/openmm-${OPENMM_VERSION} + tar xf openmm-${OPENMM_VERSION}.tar.gz -C ${SRC_FOLDER}/openmm-${OPENMM_VERSION} --strip-components=1 + mkdir -p ${SRC_FOLDER}/openmm-${OPENMM_VERSION}/build && cd ${SRC_FOLDER}/openmm-${OPENMM_VERSION}/build + cmake .. && make -j $CPUS_FOR_MAKE && make install + cd ${SRC_FOLDER}/openmm-${OPENMM_VERSION}/build/python && python setup.py build && python setup.py install +fi + +# COMPILE AND INSTALL DSSP +############## +cd ${SRC_FOLDER} +if [ ! -f dssp-${DSSP_VERSION}.tgz ]; then + wget ftp://ftp.cmbi.umcn.nl/pub/molbio/software/dssp-2/dssp-${DSSP_VERSION}.tgz + tar -xvzf dssp-${DSSP_VERSION}.tgz + cd dssp-${DSSP_VERSION} + make -j ${CPUS_FOR_MAKE} + make install +fi + +# INSTALL OST +############# + +cd ${SRC_FOLDER} +if [ ! -f openstructure-${OPENSTRUCTURE_VERSION}.tar.gz ]; then + # copy ost release + wget -O openstructure-${OPENSTRUCTURE_VERSION}.tar.gz -nc https://git.scicore.unibas.ch/schwede/openstructure/repository/${OPENSTRUCTURE_VERSION}/archive.tar.gz + mkdir openstructure-${OPENSTRUCTURE_VERSION} + tar xf openstructure-${OPENSTRUCTURE_VERSION}.tar.gz -C ${SRC_FOLDER}/openstructure-${OPENSTRUCTURE_VERSION} --strip-components=1 + mkdir -p ${SRC_FOLDER}/openstructure-${OPENSTRUCTURE_VERSION}/build && cd ${SRC_FOLDER}/openstructure-${OPENSTRUCTURE_VERSION}/build + + # cmake ost + cmake .. -DPYTHON_LIBRARIES=/usr/lib/x86_64-linux-gnu/libpython2.7.so \ + -DOPTIMIZE=ON \ + -DENABLE_MM=ON \ + -DCOMPILE_TMTOOLS=1 \ + -DUSE_NUMPY=1 \ + -DOPEN_MM_LIBRARY=$OPENMM_LIB_PATH/libOpenMM.so \ + -DOPEN_MM_INCLUDE_DIR=$OPENMM_INCLUDE_PATH \ + -DOPEN_MM_PLUGIN_DIR=$OPENMM_LIB_PATH/plugins \ + -DENABLE_GFX=ON \ + -DENABLE_GUI=ON + + # Build chemdict_tool + make -j ${CPUS_FOR_MAKE} chemdict_tool + + # get the compound library + wget ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz + stage/bin/chemdict_tool create components.cif.gz compounds.chemlib pdb + stage/bin/chemdict_tool update modules/conop/data/charmm.cif compounds.chemlib charmm + mkdir -p $OPENSTRUCTURE_SHARE + chmod a+rw -R $OPENSTRUCTURE_SHARE + mv compounds.chemlib $OPENSTRUCTURE_SHARE + + # Build and install OST + cmake .. -DPYTHON_LIBRARIES=/usr/lib/x86_64-linux-gnu/libpython2.7.so \ + -DOPTIMIZE=ON \ + -DENABLE_MM=ON \ + -DCOMPILE_TMTOOLS=1 \ + -DUSE_NUMPY=1 \ + -DOPEN_MM_LIBRARY=$OPENMM_LIB_PATH/libOpenMM.so \ + -DOPEN_MM_INCLUDE_DIR=$OPENMM_INCLUDE_PATH \ + -DOPEN_MM_PLUGIN_DIR=$OPENMM_LIB_PATH/plugins \ + -DENABLE_GFX=ON \ + -DENABLE_GUI=ON \ + -DCOMPOUND_LIB=$OPENSTRUCTURE_SHARE/compounds.chemlib + + # Build chemdict_tool + make -j ${CPUS_FOR_MAKE} + make check + make install +fi + +# SETUP JUPYTER +############### +mkdir -p /usr/local/share/ipython +mkdir -p $JUPYTER_PATH +mkdir -p $JUPYTER_RUNTIME_DIR +mkdir -p $JUPYTER_CONFIG_DIR +mkdir -p $JUPYTER_PATH/kernels/ost-kernel +chmod a+rw -R /usr/local/share/ipython +chmod a+rw -R $JUPYTER_PATH +chmod a+rw -R $JUPYTER_CONFIG_DIR +chmod a+rw -R $JUPYTER_RUNTIME_DIR +cat > $JUPYTER_PATH/kernels/ost-kernel/kernel.json <<EOF +{ + "display_name": "OST", + "language": "python", + "argv": [ + "python", + "-m", "ipykernel", + "-f", "{connection_file}", + "--InteractiveShellApp.exec_PYTHONSTARTUP=False", + "--InteractiveShellApp.exec_files=['/usr/local/lib64/python2.7/site-packages/ost/ost_startup.py']" + ], + "env": { + } +} +EOF + +jupyter nbextension enable nglview --py --sys-prefix + +# GO HOME AND CLEANUP +##################### +apt-get purge -y cmake \ + wget \ + git \ + gfortran \ + python-pip \ + libbz2-dev \ + doxygen \ + swig +apt-get clean +apt-get autoremove -y + +cd $SRC_FOLDER && rm -rf $SRC_FOLDER/* + +cd /home + +%environment +############################################################################## +# ENVIRONMENT +############################################################################## +export OST_ROOT="/usr/local" +export OPENSTRUCTURE_VERSION="1.8.0" +export PYTHONPATH="/usr/local/lib64/python2.7/site-packages:${PYTHONPATH}" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib64" +export QT_X11_NO_MITSHM=1 +export IPYTHONDIR="/usr/local/share/ipython" +export JUPYTER_CONFIG_DIR="/usr/local/etc/jupyter" +export JUPYTER_PATH="/usr/local/share/jupyter" +export JUPYTER_RUNTIME_DIR="$JUPYTER_PATH/runtime" +export VIRTUALENV_DIR="/usr/local/share/ost_venv" + +%apprun ChemdictTool +############################################################################## +# CHEMDICT TOOL APP +############################################################################## +$OST_ROOT/bin/chemdict_tool "$@" + +%apprun lDDT +############################################################################## +# lDDT APP +############################################################################## +$OST_ROOT/bin/lddt "$@" + +%apphelp lDDT +The Local Distance Difference Test. + +Usage: + + singularity run --app lDDT <IMAGE> [options] <mod1> [mod1 [mod2]] <re1>[,ref2,ref3] + +Options: + -s selection performed on ref + -c use Calphas only + -f perform structural checks and filter input data + -t fault tolerant parsing + -p <file> use specified parmeter file. Mandatory + -v <level> verbosity level (0=results only,1=problems reported, 2=full report) + -b <value> tolerance in stddevs for bonds + -a <value> tolerance in stddevs for angles + -r <value> distance inclusion radius + -i <value> sequence separation + -e print version + -x ignore residue name consistency checks + +%apprun Molck +############################################################################## +# MOLCK APP +############################################################################## +$OST_ROOT/bin/molck "$@" + +%apphelp Molck +This is molck - the molecule checker + +Usage: + singularity run --app Molck <IMAGE> [options] file1.pdb [file2.pdb [...]] + +Options: + --complib=path location of the compound library file. If not provided, the + following locations are searched in this order: + 1. Working directory, 2. OpenStructure standard library location (if the + executable is part of a standard OpenStructure installation) + --rm=<a>,<b> remove atoms and residues matching some criteria + zeroocc - Remove atoms with zero occupancy + hyd - Remove hydrogen atoms + oxt - Remove terminal oxygens + nonstd - Remove all residues not one of the 20 standard amino acids + unk - Remove unknown and atoms not following the nomenclature + --fix-ele clean up element column + --stdout write cleaned file(s) to stdout + --out=filename write cleaned file(s) to disk. % characters in the filename are + replaced with the basename of the input file without extension. + Default: %-molcked.pdb + --color=auto|on|off + whether output should be colored + --map-nonstd maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER. + +%apprun OST +############################################################################## +# OST APP +############################################################################## +$OST_ROOT/bin/ost "$@" + +%apphelp OST +The OST app exposes OpenStructure binary and can be used to run interactive shell +and scripts. + +Usage: + singularity run --app OST <IMAGE> [ost options] [script to execute] [script parameters] + +Options: + -i, --interactive start interpreter interactively (must be first + parameter, ignored otherwise) + -h, --help show this help message and exit + -v VLEVEL, --verbosity_level=VLEVEL + sets the verbosity level [default: 2] + +If script requires some external files eg. PDBs, they have to be located in the +path accessible via mounted volumes. By default Singularity mounts $HOME and +goes to CWD. Thus this sould work as expected out of the box. + + +%appenv IPython +############################################################################## +# NOTEBOOK ENV +############################################################################## +export DNG_ROOT=$OST_ROOT +export DNG_INITDIR=${DNG_ROOT}/lib64/python2.7/site-packages/ost + +%apprun IPython +############################################################################## +# OST IPYTON APP +############################################################################## +. $VIRTUALENV_DIR/bin/activate && ipython -i $DNG_INITDIR/ost_startup.py "$@" + +%apphelp IPython +OST-powered iPython shell. + +Usage: + + singularity run --app IPython <IMAGE> [options] + +Detailed help: + + singularity run --app IPython <IMAGE> --help + +%appenv Notebook +############################################################################## +# NOTEBOOK ENV +############################################################################## +export BIN_DIR=$OST_ROOT/bin +export XDG_RUNTIME_DIR="" +. $OST_ROOT/libexec/openstructure/ost_config + +%apprun Notebook +############################################################################## +# NOTEBOOK APP +############################################################################## +. $VIRTUALENV_DIR/bin/activate && jupyter notebook --NotebookApp.iopub_data_rate_limit=10000000 --no-browser "$@" + +%apphelp Notebook +A Jupyter notebook palyground with OST and nglview. + +Usage: + + singularity run --app Notebook <IMAGE> [options] + +The Jupyter notebook is run by default with `--NotebookApp.iopub_data_rate_limit=10000000` +and `--no-browser` options. + +Useful options when running on remote server: + --ip=<Unicode> (NotebookApp.ip) + Default: 'localhost' + The IP address the notebook server will listen on. + --port=<Integer> (NotebookApp.port) + Default: 8888 + The port the notebook server will listen on. + +Copy the URL to the browser and launch the notebook with OST kernel. This will +load all necessary OST components just like in the OST shell. We also enabled +the nglview widget to interactively view molecular structures and trajectories. +For more details on how to use nglview see http://nglviewer.org/nglview/latest/. + +As the Singularity mounts $HOME by default Jupyter and Ipython config files +are moved to separate directories. Proper environmental variables are also set. +In addition, Jupyter is run in a separate virtualenv to not interact with possibly +installed host version. + +To list of all available options: + + singularity run --app Notebook <IMAGE> --help + + +%runscript +############################################################################## +# RUNSCRIPT +############################################################################## +cat << EOF +Singularity container for OST $OPENSTRUCTURE_VERSION. + +This container includes the following apps: + * OST - OpenStructure binary + * IPython - OST-powered iPython shell + * Notebook - A Jupyter notebook palyground with OST and nglview + * lDDT - The Local Distance Difference Test + * Molck - Molecular checker + * ChemdictTool - Creating or update a compound library + +To see the help for each individual app run: + + singularity help --app <APP NAME> <IMAGE NAME> +EOF + +%help +Singularity container for OST. + +This container includes the following apps: + * OST - OpenStructure binary + * IPython - OST-powered iPython shell + * Notebook - A Jupyter notebook palyground with OST and nglview + * lDDT - The Local Distance Difference Test + * Molck - Molecular checker + * ChemdictTool - Creating or update a compound library + +To see the help for each individual app run: + + singularity help --app <APP NAME> <IMAGE NAME> + + diff --git a/tools/molck/CMakeLists.txt b/tools/molck/CMakeLists.txt index 4e1dc2fb79b73e90022dd2f546a5011c98eca118..c76f1fd71b4532bf32b2cfcfa5dc34a0b7fb00ff 100644 --- a/tools/molck/CMakeLists.txt +++ b/tools/molck/CMakeLists.txt @@ -1,4 +1,4 @@ if (NOT WIN32) executable(NAME molck SOURCES main.cc - DEPENDS_ON ost_io STATIC) + DEPENDS_ON ost_io ost_mol_alg STATIC) endif(NOT WIN32) diff --git a/tools/molck/main.cc b/tools/molck/main.cc index 2a445620212e97165e53b7f9bf3592ecd5a21387..ef2d77c207213957a20d2d2d460da74fa0280b73 100644 --- a/tools/molck/main.cc +++ b/tools/molck/main.cc @@ -5,25 +5,52 @@ #include <ost/base.hh> #include <ost/boost_filesystem_helper.hh> #include <ost/platform.hh> -#include <ost/conop/model_check.hh> #include <ost/conop/conop.hh> -#include <ost/conop/amino_acids.hh> -#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/pdb_writer.hh> +#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/io_exception.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/molck.hh> #if defined(__APPLE__) #include <mach-o/dyld.h> #endif using namespace ost; -using namespace ost::conop; using namespace ost::mol; using namespace ost::io; namespace po=boost::program_options; namespace fs=boost::filesystem; +const char* USAGE= +"this is molck - the molecule checker\n" +"usage: molck [options] file1.pdb [file2.pdb [...]]\n" +"options \n" +" --complib=path location of the compound library file. If not provided, the \n" +" following locations are searched in this order: \n" +" 1. Working directory, 2. OpenStructure standard library location (if the \n" +" executable is part of a standard OpenStructure installation) \n" +" --rm=<a>,<b> remove atoms and residues matching some criteria \n" +" zeroocc - Remove atoms with zero occupancy \n" +" hyd - Remove hydrogen atoms \n" +" oxt - Remove terminal oxygens \n" +" nonstd - Remove all residues not one of the 20 standard amino acids \n" +" unk - Remove unknown and atoms not following the nomenclature\n" +" --fix-ele clean up element column\n" +" --stdout write cleaned file(s) to stdout \n" +" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" +" replaced with the basename of the input file without extension. \n" +" Default: %-molcked.pdb \n" +" --color=auto|on|off \n" +" whether output should be colored\n" +" --map-nonstd maps modified residues back to the parent amino acid, for example\n" +" MSE -> MET, SEP -> SER.\n"; + +void usage() +{ + std::cerr << USAGE << std::endl; + exit(0); +} + EntityHandle load_x(const String& file, const IOProfile& profile) { try { @@ -53,20 +80,20 @@ EntityHandle load_x(const String& file, const IOProfile& profile) } // load compound library, exiting if it could not be found... -CompoundLibPtr load_compound_lib(const String& custom_path) +ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { if (fs::exists(custom_path)) { - return CompoundLib::Load(custom_path); + return ost::conop::CompoundLib::Load(custom_path); } else { std::cerr << "Could not find compounds.chemlib at the provided location, trying other options" << std::endl; } } if (fs::exists("compounds.chemlib")) { - return CompoundLib::Load("compounds.chemlib"); + return ost::conop::CompoundLib::Load("compounds.chemlib"); } char result[ 1024 ]; - CompoundLibPtr lib; + ost::conop::CompoundLibPtr lib; String exe_path; #if defined(__APPLE__) uint32_t size=1023; @@ -89,45 +116,16 @@ CompoundLibPtr load_compound_lib(const String& custom_path) String share_path_string=BFPathToString(share_path); if (fs::exists(share_path_string)) { - return CompoundLib::Load(share_path_string); + return ost::conop::CompoundLib::Load(share_path_string); } } if (!lib) { std::cerr << "Could not load compounds.chemlib" << std::endl; exit(-1); } - return CompoundLibPtr(); + return ost::conop::CompoundLibPtr(); } -const char* USAGE= -"this is molck - the molecule checker\n" -"usage: molck [options] file1.pdb [file2.pdb [...]]\n" -"options \n" -" --complib=path location of the compound library file. If not provided, the \n" -" following locations are searched in this order: \n" -" 1. Working directory, 2. OpenStructure standard library location (if the \n" -" executable is part of a standard OpenStructure installation) \n" -" --rm=<a>,<b> remove atoms and residues matching some criteria \n" -" zeroocc - Remove atoms with zero occupancy \n" -" hyd - Remove hydrogen atoms \n" -" oxt - Remove terminal oxygens \n" -" nonstd - Remove all residues not one of the 20 standard amino acids \n" -" unk - Remove unknown and atoms not following the nomenclature\n" -" --fix-ele clean up element column\n" -" --stdout write cleaned file(s) to stdout \n" -" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" -" replaced with the basename of the input file without extension. \n" -" Default: %-molcked.pdb \n" -" --color=auto|on|off \n" -" whether output should be colored\n" -" --map-nonstd maps modified residues back to the parent amino acid, for example\n" -" MSE -> MET, SEP -> SER.\n"; - -void usage() -{ - std::cerr << USAGE << std::endl; - exit(0); -} int main(int argc, char *argv[]) { @@ -136,19 +134,12 @@ int main(int argc, char *argv[]) } IOProfile prof; prof.fault_tolerant=true; + ost::mol::alg::MolckSettings settings; String rm; String color; - bool colored = false; - bool rm_unk_atoms=false; - bool rm_hyd_atoms=false; - bool rm_non_std=false; - bool rm_oxt_atoms=false; - bool rm_zero_occ_atoms=false; bool write_to_stdout = false; bool write_to_file = false; - bool map_nonstd_res = false; - bool assign_elem = false; String output_blueprint_string; String custom_path=""; @@ -196,24 +187,24 @@ int main(int argc, char *argv[]) output_blueprint_string = vm["out"].as<String>(); } if (vm.count("map-nonstd")) { - map_nonstd_res = true; + settings.map_nonstd_res = true; } if (vm.count("fix-ele")) { - assign_elem = true; + settings.assign_elem = true; } std::vector<StringRef> rms=StringRef(rm.c_str(), rm.size()).split(','); for (size_t i=0; i<rms.size(); ++i) { if (rms[i] == StringRef("unk", 3)) { - rm_unk_atoms = true; + settings.rm_unk_atoms = true; } else if (rms[i] == StringRef("nonstd", 6)) { - rm_non_std = true; + settings.rm_non_std = true; } else if (rms[i] == StringRef("hyd", 3)) { - rm_hyd_atoms = true; + settings.rm_hyd_atoms = true; } else if (rms[i] == StringRef("oxt", 3)) { - rm_oxt_atoms = true; + settings.rm_oxt_atoms = true; } else if (rms[i] == StringRef("zeroocc", 7)) { - rm_zero_occ_atoms = true; + settings.rm_zero_occ_atoms = true; } else { std::cerr << "unknown value to remove '" << rms[i] << "'" << std::endl; usage(); @@ -221,144 +212,23 @@ int main(int argc, char *argv[]) } } if (color=="auto") { - colored = isatty(STDERR_FILENO); + settings.colored = isatty(STDERR_FILENO); } else if (color == "on" || color == "1" || color == "yes") { - colored = true; + settings.colored = true; } else if (color == "off" || color == "0" || color == "no") { - colored = false; + settings.colored = false; } else { usage(); exit(-1); } - CompoundLibPtr lib=load_compound_lib(custom_path); + ost::conop::CompoundLibPtr lib=load_compound_lib(custom_path); for (unsigned int i = 0; i < files.size(); ++i) { EntityHandle ent=load_x(files[i], prof); if (!ent.IsValid()) { continue; } - if (map_nonstd_res) { - EntityHandle new_ent=CreateEntity(); - ChainHandleList chains=ent.GetChainList(); - XCSEditor new_edi=new_ent.EditXCS(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ChainHandle new_chain = new_edi.InsertChain(c->GetName()); - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - AminoAcid aa = ResidueToAminoAcid(*r); - if (aa!=XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } else { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || - OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); - CopyResidue(*r,dest_res,new_edi,lib); - } - } - } - ent=new_ent; - } - - XCSEditor edi=ent.EditXCS(); - Diagnostics diags; - Checker checker(lib, ent, diags); - if (rm_zero_occ_atoms) { - std::cerr << "removing atoms with zero occupancy" << std::endl; - int zremoved=0; - AtomHandleList zero_atoms=checker.GetZeroOccupancy(); - for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - zremoved++; - } - std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; - } - - if (rm_hyd_atoms) { - std::cerr << "removing hydrogen atoms" << std::endl; - int hremoved=0; - AtomHandleList hyd_atoms=checker.GetHydrogens(); - for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - hremoved++; - } - std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; - } - if (rm_oxt_atoms) { - std::cerr << "removing OXT atoms" << std::endl; - int oremoved=0; - AtomHandleList atoms=ent.GetAtomList(); - for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - if (i->GetName()=="OXT") { - edi.DeleteAtom(*i); - oremoved++; - } - } - std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; - } - - checker.CheckForCompleteness(); - checker.CheckForUnknownAtoms(); - checker.CheckForNonStandard(); - for (Diagnostics::const_diag_iterator - j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { - const Diag* diag=*j; - std::cerr << diag->Format(colored); - switch (diag->GetType()) { - case DIAG_UNK_ATOM: - if (rm_unk_atoms) { - edi.DeleteAtom(diag->GetAtom(0)); - std::cerr << " --> removed "; - } - break; - case DIAG_NONSTD_RESIDUE: - if (rm_non_std) { - edi.DeleteResidue(diag->GetResidue(0)); - std::cerr << " --> removed "; - } - break; - default: - break; - } - std::cerr << std::endl; - } - - if (assign_elem) { - ChainHandleList chains=ent.GetChainList(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - AtomHandleList atoms=r->GetAtomList(); - if (!compound) { - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - j->SetElement(""); - } - continue; - } - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - int specindx=compound->GetAtomSpecIndex(j->GetName()); - if (specindx!=-1) { - j->SetElement(compound->GetAtomSpecs()[specindx].element); - } else { - j->SetElement(""); - } - } - } - } - } + ost::mol::alg::Molck(ent, lib, settings); if (write_to_stdout) { PDBWriter writer(std::cout, prof);