diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 6a376fb4aac256f63be9cd1c51b84ccf8baf9e7d..03c3bf44dcf3ee27908c250ba0827d223a28d5d3 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -5,6 +5,8 @@ Changes in Release 1.x.x * Port to Qt5 * C++ wrapper around C++ implementation of TMalign * Database for efficient in-memory lookup of coordinates and sequences + * Updated CAD score binding with change in result format, check documentation + for further details Changes in Release 1.9.0 -------------------------------------------------------------------------------- diff --git a/doc/conf/conf.py b/doc/conf/conf.py index 36e11cd090ada1751759cecc24de288e41a29ebe..7600179f94620b1470c6d8452d913d7b597d8073 100644 --- a/doc/conf/conf.py +++ b/doc/conf/conf.py @@ -32,7 +32,7 @@ import ost # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', - 'sphinx.ext.coverage', 'sphinx.ext.mathjax', + 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig'] # Add any paths that contain templates here, relative to this directory. diff --git a/modules/base/pymod/table.py b/modules/base/pymod/table.py index c93a30797ac38c3f0990218caf19db0039cc8b77..994f01000658969c6cffe5291ffe9d67a374a16b 100644 --- a/modules/base/pymod/table.py +++ b/modules/base/pymod/table.py @@ -2394,46 +2394,49 @@ Statistics for column %(col)s def GetOptimalPrefactors(self, ref_col, *args, **kwargs): ''' - This returns the optimal prefactor values (i.e. a, b, c, ...) for the - following equation + This returns the optimal prefactor values (i.e. :math:`a, b, c, ...`) for + the following equation .. math:: :label: op1 - + a*u + b*v + c*w + ... = z - where u, v, w and z are vectors. In matrix notation + where :math:`u, v, w` and :math:`z` are vectors. In matrix notation .. math:: :label: op2 - + A*p = z - where A contains the data from the table (u,v,w,...), p are the prefactors - to optimize (a,b,c,...) and z is the vector containing the result of - equation :eq:`op1`. + where :math:`A` contains the data from the table :math:`(u,v,w,...)`, + :math:`p` are the prefactors to optimize :math:`(a,b,c,...)` and :math:`z` + is the vector containing the result of equation :eq:`op1`. - The parameter ref_col equals to z in both equations, and \*args are columns - u, v and w (or A in :eq:`op2`). All columns must be specified by their names. + The parameter ref_col equals to :math:`z` in both equations, and \*args + are columns :math:`u`, :math:`v` and :math:`w` (or :math:`A` in :eq:`op2`). + All columns must be specified by their names. **Example:** - + .. code-block:: python tab.GetOptimalPrefactors('colC', 'colA', 'colB') - The function returns a list of containing the prefactors a, b, c, ... in - the correct order (i.e. same as columns were specified in \*args). + The function returns a list containing the prefactors + :math:`a, b, c, ...` in the correct order (i.e. same as columns were + specified in \*args). Weighting: If the kwarg weights="columX" is specified, the equations are weighted by - the values in that column. Each row is multiplied by the weight in that row, - which leads to :eq:`op3`: + the values in that column. Each row is multiplied by the weight in that + row, which leads to :eq:`op3`: .. math:: :label: op3 - - weight*a*u + weight*b*v + weight*c*w + ... = weight*z + + \\textit{weight}*a*u + \\textit{weight}*b*v + \\textit{weight}*c*w + ... + = \\textit{weight}*z Weights must be float or int and can have any value. A value of 0 ignores this equation, a value of 1 means the same as no weight. If all weights are diff --git a/modules/bindings/pymod/cadscore.py b/modules/bindings/pymod/cadscore.py index fdaef13a955422b98189fe11fdcc1908e88f40e0..8b81c79a9bdfcd11e7d7e7edb1cabb083758bf76 100644 --- a/modules/bindings/pymod/cadscore.py +++ b/modules/bindings/pymod/cadscore.py @@ -28,8 +28,8 @@ Proteins. 2012 Aug 30. [Epub ahead of print] Authors: Valerio Mariani, Alessandro Barbato """ -import subprocess, os, tempfile, platform -from ost import settings, io +import subprocess, os, tempfile, platform, re +from ost import settings, io, mol def _SetupFiles(model,reference): # create temporary directory @@ -37,7 +37,8 @@ def _SetupFiles(model,reference): dia = 'PDB' for chain in model.chains: if chain.name==" ": - raise RuntimeError("One of the chains in the model has no name. Cannot calculate CAD score") + raise RuntimeError("One of the chains in the model has no name. Cannot " + "calculate CAD score") if len(chain.name) > 1: dia = 'CHARMM' break; @@ -49,7 +50,8 @@ def _SetupFiles(model,reference): dia = 'PDB' for chain in reference.chains: if chain.name==" ": - raise RuntimeError("One of the chains in the reference has no name. Cannot calculate CAD score") + raise RuntimeError("One of the chains in the reference has no name. " + "Cannot calculate CAD score") if len(chain.name) > 1: dia = 'CHARMM' break; @@ -64,7 +66,6 @@ def _CleanupFiles(dir_name): import shutil shutil.rmtree(dir_name) - class CADResult: """ Holds the result of running CAD @@ -77,78 +78,244 @@ class CADResult: Dictionary containing local CAD's atom-atom (AA) scores. - :type: dictionary (key: chain.resnum (e.g.: A.24), value: CAD local AA score (see CAD Documentation online) + :type: dictionary (key: tuple(chain, resnum) (e.g.: + ("A", ost.mol.ResNum(24)), value: CAD local AA score + (see CAD Documentation online) """ def __init__(self, globalAA, localAA): self.globalAA=globalAA self.localAA=localAA def _ParseCADGlobal(lines): - interesting_lines=lines[1] - aa=float(interesting_lines.split()[10]) - return aa + header = lines[0].split() + aa_idx = header.index("AA") + aa_score=float(lines[1].split()[aa_idx]) + return aa_score def _ParseCADLocal(lines): + local_scores_idx = None + for line_idx in range(len(lines)): + if "local_scores" in lines[line_idx]: + local_scores_idx = line_idx + break + if local_scores_idx == None: + raise RuntimeError("Failed to parse local cadscores") + local_aa_dict={} + for line_idx in range(local_scores_idx+2, len(lines)): + items=lines[line_idx].split() + local_aa = float(items[2]) + if local_aa < 0.0: + continue # invalid CAD score + key = (items[0], mol.ResNum(int(items[1]))) + local_aa_dict[key] = local_aa + + return local_aa_dict + +def _ParseVoronotaGlobal(lines): + return float(lines[0].split()[4]) + +def _ParseVoronotaLocal(lines): local_aa_dict={} - for lin in lines[11:]: - items=lin.split() - chain=items[0] - resnum=int(items[1]) - key=chain+'.'+str(resnum) - aa=float(items[2]) - local_aa_dict[key]=aa + chain_name_regex = r'c\<\D+\>' + resnum_regex = r'r\<\d+\>' + insertion_code_regex = r'i\<\D\>' + for line in lines: + local_aa = float(line.split()[-1]) + if local_aa < 0.0: + continue # invalid CAD score + chain_data = re.findall(chain_name_regex, line) + resnum_data = re.findall(resnum_regex, line) + insertion_code_data = re.findall(insertion_code_regex, line) + resnum = None + if len(insertion_code_data) == 0: + resnum = mol.ResNum(int(resnum_data[0][1:].strip('><'))) + else: + resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')), + insertion_code_data[0][1:].strip('><')) + key = (chain_data[0][1:].strip('><'), resnum) + local_aa_dict[key] = local_aa return local_aa_dict -def _RunCAD(max_iter, tmp_dir): +def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime): + model_filename=os.path.join(tmp_dir, 'model.pdb') reference_filename=os.path.join(tmp_dir, 'reference.pdb') + globalAA = None + localAA = None + if platform.system() == "Windows": raise RuntimeError('CAD score not available on Windows') + + if mode == "classic": + cad_calc_path = None + cad_read_g_path = None + cad_read_l_path = None + if cad_bin_path: + cad_calc_path = settings.Locate('CADscore_calc.bash', + search_paths=[cad_bin_path], + search_system_paths=False) + cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash', + search_paths=[cad_bin_path], + search_system_paths=False) + cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash', + search_paths=[cad_bin_path], + search_system_paths=False) + # also try to locate the actual executable that is called from the + # bash scripts + executable_path = settings.Locate('voroprot2', + search_paths=[cad_bin_path], + search_system_paths=False) + else: + cad_calc_path = settings.Locate('CADscore_calc.bash') + cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash') + cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash') + # also try to locate the actual executable that is called from the + # bash scripts + executable_path = settings.Locate('voroprot2') + command1="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path, + model_filename, + reference_filename, + os.path.join(tmp_dir, + "cadtemp")) + command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir, + "cadtemp")) + command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path, + model_filename, + reference_filename, + os.path.join(tmp_dir, + "cadtemp")) + + ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE) + ps1.wait() + ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE) + ps2.wait() + lines=ps2.stdout.readlines() + try: + globalAA=_ParseCADGlobal(lines) + except: + raise RuntimeError("CAD calculation failed") + ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE) + ps3.wait() + lines=ps3.stdout.readlines() + try: + localAA=_ParseCADLocal(lines) + except: + raise RuntimeError("CAD calculation failed") + + elif mode == "voronota": + local_score_filename = os.path.join(tmp_dir, "local_scores.txt") + voronota_cadscore_path = None + if cad_bin_path: + voronota_cadscore_path = settings.Locate("voronota-cadscore", + search_paths=[cad_bin_path], + search_system_paths=False) + # also try to locate the actual executable that is called from the + # bash script + executable_path = settings.Locate("voronota", + search_paths=[cad_bin_path], + search_system_paths=False) + else: + voronota_cadscore_path = settings.Locate("voronota-cadscore") + # also try to locate the actual executable that is called from the + # bash script + executable_path = settings.Locate("voronota") + cmd = [voronota_cadscore_path, '-m', model_filename, '-t', + reference_filename, '--contacts-query-by-code AA', + '--output-residue-scores', local_score_filename] + if old_regime: + cmd.append("--old-regime") + cmd = ' '.join(cmd) + ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + ps.wait() + try: + globalAA = _ParseVoronotaGlobal(ps.stdout.readlines()) + except: + raise RuntimeError("CAD calculation failed") + try: + localAA = _ParseVoronotaLocal(open(local_score_filename).readlines()) + except: + raise RuntimeError("CAD calculation failed") + else: - cad_calc_path=settings.Locate('CADscore_calc.bash') - cad_read_g_path=settings.Locate('CADscore_read_global_scores.bash') - cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash') - command1="\"%s\" -o %i -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path, max_iter, model_filename, reference_filename, os.path.join(tmp_dir,"cadtemp")) - command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,"cadtemp")) - command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path, model_filename, reference_filename,os.path.join(tmp_dir,"cadtemp")) - ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE) - ps1.wait() - ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE) - ps2.wait() - lines=ps2.stdout.readlines() - try: - globalAA=_ParseCADGlobal(lines) - except: - raise RuntimeError("CAD calculation failed") - ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE) - ps3.wait() - lines=ps3.stdout.readlines() - try: - localAA=_ParseCADLocal(lines) - except: - raise RuntimeError("CAD calculation failed") + raise RuntimeError("Invalid CAD mode! Allowed are: " + "[\"classic\", \"voronota\"]") + return CADResult(globalAA,localAA) +def _HasInsertionCodes(model, reference): + for r in model.residues: + if r.GetNumber().GetInsCode() != "\0": + print r + return True + for r in reference.residues: + if r.GetNumber().GetInsCode() != "\0": + print r + return True + return False -def CADScore(model, reference, max_iter=300): +def _MapLabels(model, cad_results, label): + for k,v in cad_results.localAA.iteritems(): + r = model.FindResidue(k[0], k[1]) + if not r.IsValid(): + raise RuntimeError("Failed to map cadscore on residues: " + + "CAD score estimated for residue in chain \"" + + k[0] + "\" with ResNum " + str(k[1]) + ". Residue " + + "could not be found in model.") + r.SetFloatProp(label, v) + +def CADScore(model, reference, mode = "classic", label = "localcad", + old_regime = False, cad_bin_path = None): """ - Calculates global and local atom-atom (AA) CAD Scores - + Calculates global and local atom-atom (AA) CAD Scores. + + You can either access the original implementation available from + https://bitbucket.org/kliment/cadscore/downloads/ + or the new implementation which is part of the Voronota package + available from https://bitbucket.org/kliment/voronota/downloads/. + + The scores of the two implementations differ but strongly correlate + as the contacts between atoms are estimated differently. When using + the "voronota" *mode* you can minimize those discrepancies by + setting the *old_regime* flag to True. + + Furthermore, the "voronota" *mode* generates per-residue scores that + are inverted when compared to the classical implementation + (0.0: bad, 1.0 good). :param model: The model structure. :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` :param reference: The reference structure - :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` - :param max_iter: Optional. The maximum number of iteration for the tessellation algorithm before giving up. By default 300 + :type reference: :class:`~ost.mol.EntityView` or + :class:`~ost.mol.EntityHandle` + :param mode: What CAD score implementation to use, must be one in + ["classic", "voronota"] + :param label: Local CAD scores will be mapped on residues of model as + float property with this name + :type label: :class:`str` + :param old_regime: Only has an effect if *mode* is "voronota". If set to true, + the discrepancies between the two modes is minimized but + the behaviour of inverted scores persists. + :type old_regime: :class:`bool` + :param cad_bin_path: Path to search for the required executables + (["CADscore_calc.bash", + "CADscore_read_global_scores.bash", + "CADscore_read_local_scores.bash"] for "classic" *mode* + or ["voronota-cadscore"] for "voronota" *mode*). If not + set, the env path is searched. + :type cad_bin_path: :class:`str` :returns: The result of the CAD score calculation :rtype: :class:`CADResult` - :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score exacutables could not be located. + :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score + executables could not be located. :raises: :class:`RuntimeError` if the calculation failed """ + if mode == "classic" and _HasInsertionCodes(model, reference): + raise RuntimeError("The classic CAD score implementation does not support " + "insertion codes in residues") tmp_dir_name=_SetupFiles(model, reference) - result=_RunCAD(max_iter, tmp_dir_name) + result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime) _CleanupFiles(tmp_dir_name) + _MapLabels(model, result, label) return result - diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index 0b5bbc7bba0c31a0e1b06e1ab6dc7902404e8684..35e71011fc05d1acbf7831df4620a6c15d321b38 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -144,12 +144,14 @@ file: .. function:: EntityToPDBStr(ent, profile=IOProfile()) - Return entity as a string in PDB format. + Return entity as a string in PDB format. :param entity: The :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView` :param profile: The IO Profile to read the entity with. For more information on the IO Profiles available, see :doc:`profile`. + :raises: IOException if the restrictions of the PDB format are not satisfied + (see :meth:`ost.io.SavePDB`) :rtype: string. diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 302b8fb4f86949de8ab7f5ec63b7e2ee9bb58ae7..70a196663eb5c0ef6b7acd9396cb4f1ff610de4b 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -184,6 +184,15 @@ def SavePDB(models, filename, dialect=None, pqr=False, profile='DEFAULT'): :param models: The entity or list of entities (handles or views) to be saved :param filename: The filename :type filename: string + :raises: IOException if the restrictions of the PDB format are not satisfied + (with the exception of atom numbers, see above): + + * Chain names with more than one character + * Atom positions with coordinates outside range [-999.99, 9999.99] + * Residue names longer than three characters + * Atom names longer than four characters + * Numeric part of :class:`ost.mol.ResNum` outside range [-999, 9999] + * Alternative atom indicators longer than one character """ if not getattr(models, '__len__', None): models=[models] diff --git a/modules/io/src/mol/pdb_writer.cc b/modules/io/src/mol/pdb_writer.cc index 25faae932e3bcb292a26b56ba8628c59a68990b3..181c2ad8ad618941e775118269ee92661e53d74b 100644 --- a/modules/io/src/mol/pdb_writer.cc +++ b/modules/io/src/mol/pdb_writer.cc @@ -141,7 +141,14 @@ void write_atom(std::ostream& ostr, FormattedLine& line, } } } - line(22, 4)=fmt::LPaddedInt(res.GetNumber().GetNum()); + + int num = res.GetNumber().GetNum(); + if(num < -999 || num > 9999) { + throw IOException("Residue number from " + res.GetQualifiedName() + + " is out of range supported by the PDB format " + "(-999 to 9999)"); + } + line(22, 4)=fmt::LPaddedInt(num); if (ins_code!=0) { line[26]=ins_code; } @@ -415,7 +422,8 @@ void PDBWriter::WriteModelLeader() if (multi_model_) { out_ << "MODEL " << mol_count_ << std::endl; } else if (mol_count_>1) { - throw IOException("Trying to write several models into one file with "); + throw IOException("Trying to write several models into one file without " + "multi model mode enabled!"); } } diff --git a/modules/io/tests/test_io_pdb.cc b/modules/io/tests/test_io_pdb.cc index ff4d3a8dbdfb70988274a0611802cc7413bded60..ffc6dd286154a33680f950b6c6c47667f9581601 100644 --- a/modules/io/tests/test_io_pdb.cc +++ b/modules/io/tests/test_io_pdb.cc @@ -781,6 +781,56 @@ BOOST_AUTO_TEST_CASE(res_name_too_long) BOOST_CHECK_THROW(writer.Write(ent), IOException); } +BOOST_AUTO_TEST_CASE(res_num_too_low) +{ + std::stringstream out; + PDBWriter writer(out, IOProfile()); + + mol::EntityHandle ent=mol::CreateEntity(); + mol::XCSEditor edi=ent.EditXCS(); + mol::ChainHandle ch = edi.InsertChain("A"); + mol::ResidueHandle r = edi.AppendResidue(ch, "ARG"); + edi.InsertAtom(r,"N", geom::Vec3(26.861, 50.841, 38.803), "N"); + edi.InsertAtom(r,"CA", geom::Vec3(27.437, 49.969, 37.786), "C"); + edi.InsertAtom(r,"C", geom::Vec3(26.336, 48.959, 37.429), "C"); + edi.InsertAtom(r,"O", geom::Vec3(25.745, 48.313, 38.312), "O"); + edi.InsertAtom(r,"CB", geom::Vec3(28.653, 49.266, 38.349), "C"); + edi.InsertAtom(r,"CG", geom::Vec3(29.870, 50.188, 38.416), "C"); + edi.InsertAtom(r,"CD", geom::Vec3(31.033, 49.532, 39.173), "C"); + edi.InsertAtom(r,"NE", geom::Vec3(32.318, 50.244, 39.125), "N"); + edi.InsertAtom(r,"CZ", geom::Vec3(33.462, 49.750, 39.679), "C"); + edi.InsertAtom(r,"NH1", geom::Vec3(33.522, 48.572, 40.308), "N"); + edi.InsertAtom(r,"NH2", geom::Vec3(34.610, 50.427, 39.597), "N"); + + edi.SetResidueNumber(r, mol::ResNum(-1000)); + BOOST_CHECK_THROW(writer.Write(ent), IOException); +} + +BOOST_AUTO_TEST_CASE(res_num_too_high) +{ + std::stringstream out; + PDBWriter writer(out, IOProfile()); + + mol::EntityHandle ent=mol::CreateEntity(); + mol::XCSEditor edi=ent.EditXCS(); + mol::ChainHandle ch = edi.InsertChain("A"); + mol::ResidueHandle r = edi.AppendResidue(ch, "ARG"); + edi.InsertAtom(r,"N", geom::Vec3(26.861, 50.841, 38.803), "N"); + edi.InsertAtom(r,"CA", geom::Vec3(27.437, 49.969, 37.786), "C"); + edi.InsertAtom(r,"C", geom::Vec3(26.336, 48.959, 37.429), "C"); + edi.InsertAtom(r,"O", geom::Vec3(25.745, 48.313, 38.312), "O"); + edi.InsertAtom(r,"CB", geom::Vec3(28.653, 49.266, 38.349), "C"); + edi.InsertAtom(r,"CG", geom::Vec3(29.870, 50.188, 38.416), "C"); + edi.InsertAtom(r,"CD", geom::Vec3(31.033, 49.532, 39.173), "C"); + edi.InsertAtom(r,"NE", geom::Vec3(32.318, 50.244, 39.125), "N"); + edi.InsertAtom(r,"CZ", geom::Vec3(33.462, 49.750, 39.679), "C"); + edi.InsertAtom(r,"NH1", geom::Vec3(33.522, 48.572, 40.308), "N"); + edi.InsertAtom(r,"NH2", geom::Vec3(34.610, 50.427, 39.597), "N"); + + edi.SetResidueNumber(r, mol::ResNum(10000)); + BOOST_CHECK_THROW(writer.Write(ent), IOException); +} + BOOST_AUTO_TEST_CASE(res_name_mismatch_alt_loc) { diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 51de4cca63a8b7f794d4a671ca5cff1b779d7cdb..300852d954f6869c02ed7d2410ba9d3f8f958bf7 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -1877,7 +1877,7 @@ API The API here is set such that the functions modify the passed structure *ent* in-place. If this is not ok, please work on a copy of the structure. -.. function:: Molck(ent, lib, settings) +.. function:: Molck(ent, lib, settings, [prune=True]) Runs Molck on provided entity. @@ -1887,6 +1887,10 @@ API :type lib: :class:`~ost.conop.CompoundLib` :param settings: Molck settings :type settings: :class:`MolckSettings` + :param prune: Whether to remove residues/chains that don't contain atoms + anymore after Molck cleanup + :type prune: :class:`bool` + .. function:: MapNonStandardResidues(ent, lib) diff --git a/modules/mol/alg/pymod/export_molck.cc b/modules/mol/alg/pymod/export_molck.cc index e5e27dac593ebaf321c1b2fd5558ff53f209f543..c626a6ce306f96ed9cbbcb24fe8b05aa4385b9ce 100644 --- a/modules/mol/alg/pymod/export_molck.cc +++ b/modules/mol/alg/pymod/export_molck.cc @@ -133,5 +133,5 @@ void export_Molck() def("CleanUpElementColumn", &CleanUpElementColumn, (arg("ent"), arg("lib"))); - def("Molck", &Molck, (arg("ent"), arg("lib"), arg("settings"))); + def("Molck", &Molck, (arg("ent"), arg("lib"), arg("settings"), arg("prune")=true)); } diff --git a/modules/mol/alg/src/accessibility.cc b/modules/mol/alg/src/accessibility.cc index be95bfeb03421916779f12af120dab8af8841faa..e84815b964e68b888230a432fc76f45dda2cfaac 100644 --- a/modules/mol/alg/src/accessibility.cc +++ b/modules/mol/alg/src/accessibility.cc @@ -50,6 +50,16 @@ struct CubeGrid { delete [] cubes; } + static long int NumCubes(int x_c, int y_c, int z_c) { + int x_cubes = std::max(1, x_c); + int y_cubes = std::max(1, y_c); + int z_cubes = std::max(1, z_c); + long int n = x_cubes; + n *= y_cubes; + n *= z_cubes; + return n; + } + int GetCubeIdx(Real x, Real y, Real z) { int x_cube = std::min(static_cast<int>((x - x_min) / cube_edge_length), x_cubes - 1); @@ -560,10 +570,23 @@ CubeGrid SetupCubeGrid(const std::vector<Real>& x, Real x_range = x_max - x_min; Real y_range = y_max - y_min; Real z_range = z_max - z_min; - Real cube_edge_length = 2 * (r_max); + Real cube_edge_length = 2 * r_max; int x_cubes = std::ceil(x_range / cube_edge_length); int y_cubes = std::ceil(y_range / cube_edge_length); int z_cubes = std::ceil(z_range / cube_edge_length); + + while(true) { + long int n = CubeGrid::NumCubes(x_cubes, y_cubes, z_cubes); + if(n <= 125000000) { + // n is small enough, 125000000 corresponds to a grid of size 500x500x500 + break; + } + // we get too many cubes, let's increase edge length... + cube_edge_length *= 1.1; + x_cubes = std::ceil(x_range / cube_edge_length); + y_cubes = std::ceil(y_range / cube_edge_length); + z_cubes = std::ceil(z_range / cube_edge_length); + } CubeGrid cube_grid(cube_edge_length, x_cubes, y_cubes, z_cubes, x_min, y_min, z_min); @@ -678,7 +701,6 @@ void CalculateASA(const std::vector<Real>& x, int num_cubes = cube_grid.GetNumCubes(); for(int cube_idx = 0; cube_idx < num_cubes; ++cube_idx) { - if(cube_grid.IsInitialized(cube_idx)) { // there is at least one atom! SolveCube(cube_grid, cube_idx, x, y, z, full_radii, diff --git a/modules/mol/alg/src/molck.cc b/modules/mol/alg/src/molck.cc index b3cfe016c342f9edbf87eca2a8b1d3524515c285..1a140b559f6639ee9132d0fec836cdddeb73d39b 100644 --- a/modules/mol/alg/src/molck.cc +++ b/modules/mol/alg/src/molck.cc @@ -5,12 +5,14 @@ #include <ost/conop/rule_based.hh> #include <ost/mol/alg/molck.hh> #include <ost/message.hh> +#include <ost/log.hh> using namespace ost::conop; using namespace ost::mol; +namespace ost{ namespace mol{ namespace alg{ -void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib) { +void MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib, bool log_diags) { // TODO: Maybe it is possible to make it in-place operation if(!lib) { @@ -45,18 +47,17 @@ void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib continue; } ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); - ost::mol::alg::CopyResidue(*r,dest_res,new_edi,lib); + CopyResidue(*r,dest_res,new_edi,lib); } } } ent = new_ent; // Since we didn't do it in-place: reprocess the new entity RuleBasedProcessor pr(lib); - pr.Process(ent); + pr.Process(ent, log_diags); } -void ost::mol::alg::RemoveAtoms( - EntityHandle& ent, +void RemoveAtoms(EntityHandle& ent, CompoundLibPtr lib, bool rm_unk_atoms, bool rm_non_std, @@ -73,29 +74,33 @@ void ost::mol::alg::RemoveAtoms( Diagnostics diags; Checker checker(lib, ent, diags); if (rm_zero_occ_atoms) { - std::cerr << "removing atoms with zero occupancy" << std::endl; + LOG_INFO("removing atoms with zero occupancy"); int zremoved=0; AtomHandleList zero_atoms=checker.GetZeroOccupancy(); for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { edi.DeleteAtom(*i); zremoved++; } - std::cerr << " --> removed " << zremoved << " atoms with zero occupancy" << std::endl; + std::stringstream ss; + ss << " --> removed " << zremoved << " atoms with zero occupancy"; + LOG_INFO(ss.str()); } if (rm_hyd_atoms) { - std::cerr << "removing hydrogen atoms" << std::endl; + LOG_INFO("removing hydrogen atoms"); int hremoved=0; AtomHandleList hyd_atoms=checker.GetHydrogens(); for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { edi.DeleteAtom(*i); hremoved++; } - std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; + std::stringstream ss; + ss << " --> removed " << hremoved << " hydrogen atoms"; + LOG_INFO(ss.str()); } if (rm_oxt_atoms) { - std::cerr << "removing OXT atoms" << std::endl; + LOG_INFO("removing OXT atoms"); int oremoved=0; AtomHandleList atoms=ent.GetAtomList(); for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { @@ -104,7 +109,9 @@ void ost::mol::alg::RemoveAtoms( oremoved++; } } - std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + std::stringstream ss; + ss << " --> removed " << oremoved << " OXT atoms"; + LOG_INFO(ss.str()); } checker.CheckForCompleteness(); @@ -113,28 +120,29 @@ void ost::mol::alg::RemoveAtoms( for (Diagnostics::const_diag_iterator j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { const Diag* diag=*j; - std::cerr << diag->Format(colored); + std::stringstream ss; + ss << diag->Format(colored); switch (diag->GetType()) { case DIAG_UNK_ATOM: if (rm_unk_atoms) { edi.DeleteAtom(diag->GetAtom(0)); - std::cerr << " --> removed "; + ss << " --> removed "; } break; case DIAG_NONSTD_RESIDUE: if (rm_non_std) { edi.DeleteResidue(diag->GetResidue(0)); - std::cerr << " --> removed "; + ss << " --> removed "; } break; default: break; } - std::cerr << std::endl; + LOG_INFO(ss.str()); } } -void ost::mol::alg::CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ +void CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ if(!lib) { throw ost::Error("Require valid compound library!"); @@ -164,20 +172,20 @@ void ost::mol::alg::CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ } } -void ost::mol::alg::Molck( - ost::mol::EntityHandle& ent, +void Molck(ost::mol::EntityHandle& ent, ost::conop::CompoundLibPtr lib, - const ost::mol::alg::MolckSettings& settings=ost::mol::alg::MolckSettings()){ + const MolckSettings& settings, + bool prune) { if(!lib) { throw ost::Error("Require valid compound library!"); } if (settings.map_nonstd_res) { - ost::mol::alg::MapNonStandardResidues(ent, lib); + MapNonStandardResidues(ent, lib, false); } - ost::mol::alg::RemoveAtoms(ent, - lib, + + RemoveAtoms(ent, lib, settings.rm_unk_atoms, settings.rm_non_std, settings.rm_hyd_atoms, @@ -185,7 +193,13 @@ void ost::mol::alg::Molck( settings.rm_zero_occ_atoms, settings.colored); if (settings.assign_elem) { - ost::mol::alg::CleanUpElementColumn(ent, lib); - } + CleanUpElementColumn(ent, lib); + } + + if(prune) { + ost::mol::XCSEditor edi = ent.EditXCS(); + edi.Prune(); + } } +}}} // ns \ No newline at end of file diff --git a/modules/mol/alg/src/molck.hh b/modules/mol/alg/src/molck.hh index d61e0218678bd07fba0e828fdf0f901dd69977e5..f021ef7ec9b81aaeabf5a46ceaaf8a8bfd84edee 100644 --- a/modules/mol/alg/src/molck.hh +++ b/modules/mol/alg/src/molck.hh @@ -80,7 +80,8 @@ struct MolckSettings{ }; void MapNonStandardResidues(ost::mol::EntityHandle& ent, - ost::conop::CompoundLibPtr lib); + ost::conop::CompoundLibPtr lib, + bool log_diags = true); void RemoveAtoms(ost::mol::EntityHandle& ent, ost::conop::CompoundLibPtr lib, @@ -96,7 +97,8 @@ void CleanUpElementColumn(ost::mol::EntityHandle& ent, void Molck(ost::mol::EntityHandle& ent, ost::conop::CompoundLibPtr lib, - const MolckSettings& settings); + const MolckSettings& settings, + bool prune = true); }}} // namespace diff --git a/modules/mol/base/doc/editors.rst b/modules/mol/base/doc/editors.rst index a5f31e8be85a81d8142861634e8cade02ed29169..cc3846a717103c0d5d4eaf081547e33934821143 100644 --- a/modules/mol/base/doc/editors.rst +++ b/modules/mol/base/doc/editors.rst @@ -360,6 +360,10 @@ The basic functionality of editors is implemented in the EditorBase class. :type atom2: :class:`AtomHandle` :param bond_order: bond order (e.g. 1=single, 2=double, 3=triple) :type bond_order: :class:`int` + + .. method:: Prune() + + Removes residues and chains that don't contain any atoms. Editor for the External Coordinate System diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 4d9f7733d7244a87f0492e567eb16d1a752ab8dc..47b4b9915dde2defcdc8b491b958521758e508f3 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -401,6 +401,12 @@ The Handle Classes print chain.residues # [B.GLY1, B.GLY4, B.GLY3] print chain.in_sequence # prints false + .. attribute:: residue_count + + Number of residues. Read-only. See :meth:`GetResidueCount`. + + :type: :class:`int` + .. attribute:: atoms Get list of all atoms of this chain. To access a single atom, use @@ -454,7 +460,11 @@ The Handle Classes .. method:: GetResidueList() - See :attr:`residues`. + See :attr:`residues` + + .. method:: GetResidueCount() + + See :attr:`residue_count` .. method:: FindAtom(res_num, atom_name) @@ -1870,7 +1880,11 @@ Residue Numbering Number for a residue. The residue number has a numeric part and an (optional) insertion-code. You can work with this object as if it was an integer and comparison will look first at the numeric part and then the insertion-code. - All access to existing objects is read-only. + All access to existing objects is read-only. Openstructure supports a range + of (-8388608 to 8388607) for the numeric part. However, the PDB format only + supports a range of (-999, 9999). This becomes relevant when a structure is + saved in PDB format where an IOException is raised if the PDB range is not + respected. :param num: Numeric part of residue number. :type num: :class:`int` diff --git a/modules/mol/base/pymod/export_editors.cc b/modules/mol/base/pymod/export_editors.cc index cd7b326fdbcc88581feb24e2af2deb838ff435b0..4bb64bd8ac3eb646176b7bfc6e781a5049253139 100644 --- a/modules/mol/base/pymod/export_editors.cc +++ b/modules/mol/base/pymod/export_editors.cc @@ -267,6 +267,7 @@ void export_Editors() .def("RenumberAllResidues",&EditorBase::RenumberAllResidues) .def("RenumberChain",renumber_chain_a) .def("RenumberChain",renumber_chain_b) + .def("Prune", &EditorBase::Prune) ; void (XCSEditor::*apply_transform1)(const geom::Mat4&) = &XCSEditor::ApplyTransform; diff --git a/modules/mol/base/src/editor_base.cc b/modules/mol/base/src/editor_base.cc index 364ea0be0bbbd07f44073d6e65b9723e1039e25f..fcc5bd3309e845491a498f35b25be5a4dbf9903d 100644 --- a/modules/mol/base/src/editor_base.cc +++ b/modules/mol/base/src/editor_base.cc @@ -298,6 +298,22 @@ TorsionHandle EditorBase::AddTorsion(const String& name, const AtomHandle& a1, a3.Impl(), a4.Impl())); } +void EditorBase::Prune() { + ost::mol::ResidueHandleList res_list = ent_.GetResidueList(); + for(ost::mol::ResidueHandleList::iterator it = res_list.begin(); + it != res_list.end(); ++it) { + if(it->GetAtomCount() == 0) { + this->DeleteResidue(*it); + } + } + ost::mol::ChainHandleList chain_list = ent_.GetChainList(); + for(ost::mol::ChainHandleList::iterator it = chain_list.begin(); + it != chain_list.end(); ++it) { + if(it->GetResidueCount() == 0) { + this->DeleteChain(*it); + } + } +} void EditorBase::UpdateTrace() { diff --git a/modules/mol/base/src/editor_base.hh b/modules/mol/base/src/editor_base.hh index c45036072670958e5eeef9cadb9f23c6def955be..22ca4a5616ca66a7d46e72f99a891c9f4fdd1092 100644 --- a/modules/mol/base/src/editor_base.hh +++ b/modules/mol/base/src/editor_base.hh @@ -328,6 +328,11 @@ public: /// \brief change the name of the atom to the new name void RenameAtom(AtomHandle atom, const String& new_name); + + /// \brief Removes all residues and chains in the attached entity that don't + /// contain any atoms + void Prune(); + protected: EditorBase(const EntityHandle& ent, EditMode mode); void UpdateTrace(); diff --git a/modules/mol/base/src/impl/query_impl.cc b/modules/mol/base/src/impl/query_impl.cc index 586ad080a38ae090d70c1991d3a8bb37d27dbbb5..733999f26ef6b2a3eeee7255c89db667c5825d08 100644 --- a/modules/mol/base/src/impl/query_impl.cc +++ b/modules/mol/base/src/impl/query_impl.cc @@ -191,7 +191,8 @@ QueryToken QueryLexer::LexToken() { if (isdigit(current_char) || current_char=='-') { return this->LexNumericToken(); } - if (isalpha(current_char) || current_char=='?' || current_char=='*') { + if (isalpha(current_char) || current_char=='?' || current_char=='*' || + current_char=='_') { return this->LexIdentOrStringToken(); } switch (current_char) { diff --git a/modules/mol/base/tests/test_entity.cc b/modules/mol/base/tests/test_entity.cc index 89b6349bea4682f27d5f9aa148de7d3a1f63913f..cab0efe6948fada6faeeeaa85f99ba57d1715856 100644 --- a/modules/mol/base/tests/test_entity.cc +++ b/modules/mol/base/tests/test_entity.cc @@ -441,4 +441,57 @@ BOOST_AUTO_TEST_CASE(minmax) BOOST_CHECK_EQUAL(minmax.second, 7.0); } +BOOST_AUTO_TEST_CASE(prune) { + EntityHandle ent=mol::CreateEntity(); + XCSEditor edi = ent.EditXCS(); + + ChainHandle ch1 = edi.InsertChain("A"); + ChainHandle ch2 = edi.InsertChain("B"); + + ResidueHandle res1_1 = edi.AppendResidue(ch1, "DUMMY"); + ResidueHandle res1_2 = edi.AppendResidue(ch1, "DUMMY"); + AtomHandle atom1_1_1 = edi.InsertAtom(res1_1, "X", geom::Vec3(1,2,3), "C"); + AtomHandle atom1_2_1 = edi.InsertAtom(res1_2, "X", geom::Vec3(1,2,3), "C"); + + ResidueHandle res2_1 = edi.AppendResidue(ch2, "DUMMY"); + ResidueHandle res2_2 = edi.AppendResidue(ch2, "DUMMY"); + AtomHandle atom2_1_1 = edi.InsertAtom(res2_1, "X", geom::Vec3(1,2,3), "C"); + AtomHandle atom2_2_1 = edi.InsertAtom(res2_2, "X", geom::Vec3(1,2,3), "C"); + + BOOST_CHECK_EQUAL(ent.GetChainCount(), 2); + BOOST_CHECK_EQUAL(ent.GetResidueCount(), 4); + BOOST_CHECK_EQUAL(ent.GetAtomCount(), 4); + + // shouldn't do anything + edi.Prune(); + BOOST_CHECK_EQUAL(ent.GetChainCount(), 2); + BOOST_CHECK_EQUAL(ent.GetResidueCount(), 4); + BOOST_CHECK_EQUAL(ent.GetAtomCount(), 4); + + // remove one atom, the subsequent prune call should then remove the + // according residue but the number of chains should remain the same + edi.DeleteAtom(atom1_1_1); + edi.Prune(); + BOOST_CHECK_EQUAL(ent.GetChainCount(), 2); + BOOST_CHECK_EQUAL(ent.GetResidueCount(), 3); + BOOST_CHECK_EQUAL(ent.GetAtomCount(), 3); + + // remove the second atom from the first chain, the whole chain should then + // be removed by Prune + edi.DeleteAtom(atom1_2_1); + edi.Prune(); + BOOST_CHECK_EQUAL(ent.GetChainCount(), 1); + BOOST_CHECK_EQUAL(ent.GetResidueCount(), 2); + BOOST_CHECK_EQUAL(ent.GetAtomCount(), 2); + + // remove both residues from the second chain. The entity should be empty + // afterwards + edi.DeleteResidue(res2_1); + edi.DeleteResidue(res2_2); + edi.Prune(); + BOOST_CHECK_EQUAL(ent.GetChainCount(), 0); + BOOST_CHECK_EQUAL(ent.GetResidueCount(), 0); + BOOST_CHECK_EQUAL(ent.GetAtomCount(), 0); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/seq/alg/doc/seqalg.rst b/modules/seq/alg/doc/seqalg.rst index ca98d1b654cf74d890c0257f3f9087cf12a794d7..8f1abfd84218c42b3a66235616cf4038374b3666 100644 --- a/modules/seq/alg/doc/seqalg.rst +++ b/modules/seq/alg/doc/seqalg.rst @@ -9,8 +9,17 @@ Algorithms for Alignments .. function:: MergePairwiseAlignments(pairwise_alns, ref_seq) + :param pairwise_alns: A list of pairwise alignments + :type pairwise_alns: :class:`~ost.seq.AlignmentList` + + :param ref_seq: The reference sequence + :type ref_seq: :class:`~ost.seq.SequenceHandle` + + :returns: The merged alignment + :rtype: :class:`~ost.seq.AlignmentHandle` + Merge a list of pairwise alignments into a multiple sequence alignments. This - function uses the reference sequence as the anchor and inserts gaps where + function uses the reference sequence as the anchor and inserts gaps where needed. This is also known as the *star method*. The resulting multiple sequence alignment provides a simple way to map between diff --git a/tools/molck/main.cc b/tools/molck/main.cc index ef2d77c207213957a20d2d2d460da74fa0280b73..8507a1895b2d6f13d2aeecec94a826a392ce0dd0 100644 --- a/tools/molck/main.cc +++ b/tools/molck/main.cc @@ -11,6 +11,7 @@ #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/io_exception.hh> #include <ost/mol/alg/molck.hh> +#include <ost/log.hh> #if defined(__APPLE__) #include <mach-o/dyld.h> #endif @@ -47,7 +48,7 @@ const char* USAGE= void usage() { - std::cerr << USAGE << std::endl; + LOG_INFO(USAGE); exit(0); } @@ -69,12 +70,14 @@ EntityHandle load_x(const String& file, const IOProfile& profile) reader.Import(ent); return ent; } - std::cerr << "ERROR: '" << file << "' does not contain any ATOM records. " - << "Are you sure this is a PDB file?" << std::endl; + std::stringstream ss; + ss << "ERROR: '" << file << "' does not contain any ATOM records. " + << "Are you sure this is a PDB file?"; + LOG_INFO(ss.str()); return EntityHandle(); } } catch (std::exception& e) { - std::cerr << "ERROR: " << e.what() << std::endl; + LOG_ERROR("ERROR: " << e.what()); return EntityHandle(); } } @@ -86,7 +89,7 @@ ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) if (fs::exists(custom_path)) { return ost::conop::CompoundLib::Load(custom_path); } else { - std::cerr << "Could not find compounds.chemlib at the provided location, trying other options" << std::endl; + LOG_INFO("Could not find compounds.chemlib at the provided location, trying other options"); } } if (fs::exists("compounds.chemlib")) { @@ -105,8 +108,8 @@ ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) exe_path = std::string( result, (count > 0) ? count : 0 ); #endif if (exe_path.empty()) { - std::cerr << "Could not determine the path of the molck executable. Will only " - "look for compounds.chemlib in the current working directory" << std::endl; + LOG_INFO("Could not determine the path of the molck executable. Will only " + "look for compounds.chemlib in the current working directory"); } else { fs::path path_and_exe(exe_path); fs::path path_only=path_and_exe.branch_path(); @@ -120,7 +123,7 @@ ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) } } if (!lib) { - std::cerr << "Could not load compounds.chemlib" << std::endl; + LOG_ERROR("Could not load compounds.chemlib"); exit(-1); } return ost::conop::CompoundLibPtr(); @@ -129,6 +132,9 @@ ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) int main(int argc, char *argv[]) { + // setup logging + ost::Logger::Instance().PushVerbosityLevel(ost::Logger::INFO); + if (argc<2) { usage(); } @@ -166,7 +172,7 @@ int main(int argc, char *argv[]) options(desc).positional(p).run(), vm); } catch (std::exception& e) { - std::cerr << e.what() << std::endl; + LOG_ERROR(e.what()); usage(); exit(-1); } @@ -206,7 +212,9 @@ int main(int argc, char *argv[]) } else if (rms[i] == StringRef("zeroocc", 7)) { settings.rm_zero_occ_atoms = true; } else { - std::cerr << "unknown value to remove '" << rms[i] << "'" << std::endl; + std::stringstream ss; + ss << "unknown value to remove '" << rms[i] << "'"; + LOG_ERROR(ss.str()); usage(); exit(-1); } @@ -246,8 +254,7 @@ int main(int argc, char *argv[]) try { fs::path out_path(output_blueprint_string_copy); if (out_path.has_parent_path() && !exists(out_path.parent_path())) { - std::cerr << "Output path does not exist: " - << output_blueprint_string_copy << std::endl; + LOG_ERROR("Output path does not exist: " + output_blueprint_string_copy); exit(-1); } } catch (std::exception& e) { @@ -255,15 +262,14 @@ int main(int argc, char *argv[]) size_t perden = String(e.what()).find("Permission denied"); if (perden != String::npos) { - std::cerr << "Cannot write into output directory: " - << output_blueprint_string_copy << std::endl; + LOG_ERROR("Cannot write into output directory: " + output_blueprint_string_copy); exit(-1); } else { - std::cerr << e.what() << std::endl; + LOG_ERROR(e.what()); exit(-1); } } - std::cerr << "Writing out file: " << output_blueprint_string_copy << std::endl; + LOG_INFO("Writing out file: " + output_blueprint_string_copy); PDBWriter writer(output_blueprint_string_copy, prof); writer.Write(ent); }