diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 8227297675eac5e2c0793ad9021fc4ed321a6cb2..01f62d91c401707e5819883e1d5695510574e301 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,7 +1,7 @@ -Changes in Release 2.x +Changes in Release 2.1.0 -------------------------------------------------------------------------------- - * Use the newer voronota implementation as default in CAD-score binding + * Use the newer Voronota implementation as default in CAD-score binding * Added HHblits3 support - HHblits2 still supported but considered deprecated * HMMScore: HMM-HMM alignment score as it is optimized in HHblits searches * Support for the new carbohydrate-extension in the mmCIF file format diff --git a/CMakeLists.txt b/CMakeLists.txt index 91af96562fbee8cccd2d9e6c0ae957bdbfddd485..6bd006d7804dd0bea7bb1609c9d89f40c78c5cac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_policy(SET CMP0060 NEW) project(OpenStructure CXX C) set (CMAKE_EXPORT_COMPILE_COMMANDS 1) set (OST_VERSION_MAJOR 2) -set (OST_VERSION_MINOR 0) +set (OST_VERSION_MINOR 1) set (OST_VERSION_PATCH 0) set (OST_VERSION_STRING ${OST_VERSION_MAJOR}.${OST_VERSION_MINOR}.${OST_VERSION_PATCH} ) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake_support) diff --git a/docker/Dockerfile b/docker/Dockerfile index 63df3f9e4267df1c596a16410463708a6463792c..b0863e7838d088bb47e9c166dd79784fa6b014cb 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,7 @@ FROM ubuntu:18.04 # ARGUMENTS ########### -ARG OPENSTRUCTURE_VERSION="2.0.0" +ARG OPENSTRUCTURE_VERSION="2.1.0" ARG SRC_FOLDER="/usr/local/src" ARG CPUS_FOR_MAKE=2 ARG MSMS_VERSION="2.6.1" diff --git a/modules/bindings/pymod/hhblits.py b/modules/bindings/pymod/hhblits.py index 0ee1f3ecad62fccb5cd77905a9500f46b01426d1..3ca019bef6e83a97a392ad73f7cbd2a9a9548d09 100644 --- a/modules/bindings/pymod/hhblits.py +++ b/modules/bindings/pymod/hhblits.py @@ -20,9 +20,9 @@ def GetHHblitsVersionString(): # regular expression in the form # HHblits, whatever except newline, x.y.z # where x.y.z are the version numerals - version_line = re.search('HHblits[^\n]+\d+\.\d+\.\d+', proc.stdout.decode()) + version_line = re.search(r'HHblits[^\n]+\d+\.\d+\.\d+', proc.stdout.decode()) if version_line is not None: - version = re.search('\d+\.\d+\.\d+', version_line.group()) + version = re.search(r'\d+\.\d+\.\d+', version_line.group()) if version is not None: version_string = version.group() diff --git a/modules/doc/install.rst b/modules/doc/install.rst index ed8495c104aa747330f3696c63b7646c69d968ff..4dbfffd133bcfa2902caf828d37bf29af2da9f6f 100644 --- a/modules/doc/install.rst +++ b/modules/doc/install.rst @@ -300,13 +300,13 @@ observed for OpenMM versions 6.1 until 7.1.1 when compiling with gcc versions >= from source. -**Ubuntu 18.04 LTS / Debian 10.3.0 with GUI** +**Ubuntu 20.04 LTS / Debian 10 with GUI** All the dependencies can be installed from the package manager as follows: .. code-block:: bash - sudo apt-get install cmake g++ sip-dev libtiff-dev libfftw3-dev libeigen3-dev \ + sudo apt-get install cmake g++ libtiff-dev libfftw3-dev libeigen3-dev \ libpng-dev python3-all python3-pyqt5 libboost-all-dev \ qt5-qmake qtbase5-dev libpng-dev libsqlite3-dev @@ -318,11 +318,11 @@ version of OpenStructure. .. code-block:: bash - cmake . -DPYTHON_LIBRARIES=/usr/lib/x86_64-linux-gnu/libpython3.6m.so \ + cmake . -DPYTHON_LIBRARIES=/usr/lib/x86_64-linux-gnu/libpython3.8.so \ -DOPTIMIZE=ON Be careful at -DPYTHON_LIBRARIES, Debian 10 comes with Python 3.7 so that needs -to be substituted. +to be substituted (libpython3.8.so -> libpython3.7m.so). **macOS (Catalina) with Homebrew** diff --git a/modules/gui/pymod/__init__.py b/modules/gui/pymod/__init__.py index 78c1617af5a25907fa2f21472e59fe4ec4aa45cb..fc8d1494c41b4daec849c7af1f33935119f3516a 100644 --- a/modules/gui/pymod/__init__.py +++ b/modules/gui/pymod/__init__.py @@ -18,8 +18,6 @@ #------------------------------------------------------------------------------ import ost.gui.trajectory_viewer from ._ost_gui import * -import sip - ## \brief Opens a DataViewer # \sa \example fft_li.py "View Fourier Transform Example" \sa \ref modulate_image.py "Modulate Image Example" diff --git a/modules/gui/pymod/export_input.cc b/modules/gui/pymod/export_input.cc index 06846fc98ace3c57af3d7435f3bbf72fe7dc6687..8aec22c2b7c5717905dfffaedb06e96e09701284 100644 --- a/modules/gui/pymod/export_input.cc +++ b/modules/gui/pymod/export_input.cc @@ -30,8 +30,8 @@ namespace { object spnav_get_instance() { - static object sip_module=import("sip"); static object pyqt5_module=import("PyQt5.QtCore"); + static object sip_module=import("sip"); SpnavInput* si = SpnavInput::Instance(); if(si->isValid()) { return ost::gui::get_py_qobject<SpnavInput>(si); diff --git a/modules/gui/pymod/init_context_menu.py b/modules/gui/pymod/init_context_menu.py index 9693ef5c8db13ef1dca585a17614eb77650aaa6b..a241253f1d0e1886e2c9214b2949236591acff53 100644 --- a/modules/gui/pymod/init_context_menu.py +++ b/modules/gui/pymod/init_context_menu.py @@ -2,8 +2,6 @@ import platform from PyQt5 import QtCore, QtWidgets -import sip - from ost import geom, gfx, gui, seq from ost import settings from ost import LogError, mol diff --git a/modules/gui/pymod/init_menubar.py b/modules/gui/pymod/init_menubar.py index c245f0f3ceccfd682d40a22d7df94562be2feb9b..2c8f05c4d8f92c834fbe85591c496f5c3af7d7b8 100644 --- a/modules/gui/pymod/init_menubar.py +++ b/modules/gui/pymod/init_menubar.py @@ -19,7 +19,6 @@ import sys from ost import gui -import sip from ost import gfx import ost diff --git a/modules/gui/pymod/scene/file_loader.py b/modules/gui/pymod/scene/file_loader.py index adf747d10b27ff9b7c19b682fb9aa86d0c33f03f..567450218fa9d77cb213e7a7b799dc29a9a73ac4 100644 --- a/modules/gui/pymod/scene/file_loader.py +++ b/modules/gui/pymod/scene/file_loader.py @@ -20,9 +20,7 @@ from ost import gui from ost import info import ost -import sip import re - from PyQt5 import QtCore, QtGui, QtNetwork from ost.gui import FileLoader diff --git a/modules/gui/pymod/scene/init_inspector.py b/modules/gui/pymod/scene/init_inspector.py index 99fea817bfbe79c96479494e8b040c46c4c19a37..f250b80ea0b511733fc16a32ca988040e1cf16b2 100644 --- a/modules/gui/pymod/scene/init_inspector.py +++ b/modules/gui/pymod/scene/init_inspector.py @@ -17,8 +17,6 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #------------------------------------------------------------------------------ -import sip - from ost import gui from ost import gfx from PyQt5 import QtCore diff --git a/modules/gui/pymod/scene/inspector_widget.py b/modules/gui/pymod/scene/inspector_widget.py index f1d98d3778ef603cf3cc9849b3a01661b60fba16..5ae8b872508c8a7e77d91626c67802c43950d39d 100644 --- a/modules/gui/pymod/scene/inspector_widget.py +++ b/modules/gui/pymod/scene/inspector_widget.py @@ -19,7 +19,6 @@ import sys from ost import gui -import sip from ost import gfx import ost import os diff --git a/modules/gui/pymod/scene/scene_selection_helper.py b/modules/gui/pymod/scene/scene_selection_helper.py index 91686daf2270378337e2935cb8ad75c733854e7f..0c2d8d09aa127d3241c7019c4f17ff25e28cc727 100644 --- a/modules/gui/pymod/scene/scene_selection_helper.py +++ b/modules/gui/pymod/scene/scene_selection_helper.py @@ -19,7 +19,6 @@ import sys from ost import gui -import sip from ost import gfx import ost import os diff --git a/modules/gui/pymod/sip_handler.hh b/modules/gui/pymod/sip_handler.hh index c2f9c189a1a2a4710e88e125360c8e36b498a244..5a37b312ac79836b04c5cfde7ae0fb82a71ffe71 100644 --- a/modules/gui/pymod/sip_handler.hh +++ b/modules/gui/pymod/sip_handler.hh @@ -37,8 +37,8 @@ namespace ost { namespace gui { template <class O> object get_py_qobject(O* cpp_object) { if (cpp_object != NULL){ - static object sip_module=import("sip"); static object pyqt5_module=import("PyQt5.QtCore"); + static object sip_module=import("sip"); QObject* qobject = qobject_cast<QObject*>(cpp_object); unsigned long addr = reinterpret_cast<unsigned long>(qobject); object py_qobject = pyqt5_module.attr("QObject"); @@ -55,6 +55,7 @@ template <class O> O* get_cpp_qobject(object py_object) if(PyObject_HasAttrString(py_object.ptr(), "qobject")){ py_object = py_object.attr("qobject"); } + static object pyqt5_module=import("PyQt5.QtCore"); static object sip_module=import("sip"); unsigned long addr = extract<unsigned long>(sip_module.attr("unwrapinstance")(py_object)); if(addr){ diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index f0ad0c7788c5d0faa585c0a2389fad07d3be61e3..41a6ed5f2e59c0c13e915d1bc527d2de5469b498 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -800,7 +800,7 @@ of the annotation available. See :attr:`operationsintervalls` - .. function:: PDBize(asu, seqres=None, min_polymer_size=10, transformation=False) + .. function:: PDBize(asu, seqres=None, min_polymer_size=None, transformation=False, peptide_min_size=10, nucleicacid_min_size=10, saccharide_min_size=10) Returns the biological assembly (bio unit) for an entity. The new entity created is well suited to be saved as a PDB file. Therefore the function @@ -812,7 +812,8 @@ of the annotation available. - Each polymer gets its own chain, named A-Z 0-9 a-z. - The description of non-polymer chains will be put into a generic string property called description on the residue level. - - Ligands that resemble a polymer but have less than *min_polymer_size* + - Ligands that resemble a polymer but have less than *min_polymer_size* / + *peptide_min_size* / *nucleicacid_min_size* / *saccharide_min_size* residues are assigned the same numeric residue number. The residues are distinguished by insertion code. - Sometimes bio units exceed the coordinate system storable in a PDB file. @@ -832,11 +833,21 @@ of the annotation available. :type seqres: :class:`~ost.seq.SequenceList` :param min_polymer_size: The minimal number of residues a polymer needs to get its own chain. Everything below that number will be sorted into the - ligand chain. + ligand chain. Overrides *peptide_min_size*, *nucleicacid_min_size* and + *saccharide_min_size* if set to a value different than None. :type min_polymer_size: int :param transformation: If set, return the transformation matrix used to move the bounding box of the bio unit to the lower left corner. :type transformation: :class:`bool` + :param peptide_min_size: Minimal size to get an individual chain for a + polypeptide. Is overridden by *min_polymer_size*. + :type peptide_min_size: :class:`int` + :param nucleicacid_min_size: Minimal size to get an individual chain for a + polynucleotide. Is overridden by *min_polymer_size*. + :type nucleicacid_min_size: :class:`int` + :param saccharide_min_size: Minimal size to get an individual chain for an + oligosaccharide or polysaccharide. Is overridden by *min_polymer_size*. + :type saccharide_min_size: :class:`int` .. class:: MMCifInfoStructDetails diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index cf8063d011627d477fd7401d004d7b6664a621f7..a0a29a6a43a98691faf1f1b54650f1fff5a0600a 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -358,9 +358,15 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT' # arguement is the usual 'self'. # documentation for this function was moved to mmcif.rst, # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX. -def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, - transformation=False): - pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size) +def _PDBize(biounit, asu, seqres=None, min_polymer_size=None, + transformation=False, peptide_min_size=10, nucleicacid_min_size=10, + saccharide_min_size=10): + if min_polymer_size is not None: + pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size) + else: + pdbizer = mol.alg.PDBize(peptide_min_size=peptide_min_size, + nucleicacid_min_size=nucleicacid_min_size, + saccharide_min_size=saccharide_min_size) chains = biounit.GetChainList() c_intvls = biounit.GetChainIntervalList() diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index 9ff88e2f0a86ab236fc8074b59f392e8e92c931b..d392692c7fbc0cca9114d8051519d0ce51ace003 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -184,14 +184,19 @@ void ChemdictParser::InitTypeMap() tm_["L-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); tm_["L-GAMMA-PEPTIDE, C-DELTA LINKING"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); tm_["L-BETA-PEPTIDE, C-GAMMA LINKING"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); + tm_["D-PEPTIDE COOH CARBOXY TERMINUS"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["D-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["D-BETA-PEPTIDE, C-GAMMA LINKING"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["D-GAMMA-PEPTIDE, C-DELTA LINKING"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["L-SACCHARIDE, ALPHA LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); + tm_["L-SACCHARIDE, BETA LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); tm_["L-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); + tm_["L-SACCHARIDE 1,4 AND 1,6 LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); tm_["D-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); tm_["L-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); tm_["D-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); + tm_["D-SACCHARIDE, BETA LINKING"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); + tm_["D-SACCHARIDE, ALPHA LINKING"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); tm_["SACCHARIDE"]=mol::ChemClass(mol::ChemClass::SACCHARIDE); tm_["D-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["L-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); @@ -212,6 +217,7 @@ void ChemdictParser::InitTypeMap() tm_["RNA OH 5 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::RNA_LINKING); tm_["?"]=mol::ChemClass(mol::ChemClass::UNKNOWN); tm_["WATER"]=mol::ChemClass(mol::ChemClass::WATER); + tm_["OTHER"]=mol::ChemClass(mol::ChemClass::UNKNOWN); } void ChemdictParser::InitPDBXTypeMap() diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index fd5b6d94de273dc57317e270d58596534fb5131f..d36877c0fb9024c9a200ca0e327fd9b68f900cbd 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -439,7 +439,10 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) class_<mol::alg::PDBize>("PDBize", - init<int>(arg("min_polymer_size")=10)) + init<int,int,int>((arg("peptide_min_size"), + arg("nucleicacid_min_size"), + arg("saccharide_min_size")))) + .def(init<int>(arg("min_polymer_size")=10)) .def("Add", &mol::alg::PDBize::Add, (arg("asu"), arg("transformations"), arg("seqres"))) .def("Finish", &mol::alg::PDBize::Finish, arg("shift_to_fit")=true) diff --git a/modules/mol/alg/src/pdbize.cc b/modules/mol/alg/src/pdbize.cc index 84853c6f0d20502c94d5bfb38f110ff3abf7b46d..c53749ff4bb06dd589940a1a057536a36b6a7558 100644 --- a/modules/mol/alg/src/pdbize.cc +++ b/modules/mol/alg/src/pdbize.cc @@ -76,12 +76,19 @@ void PDBize::Add(EntityView asu, const geom::Mat4List& transforms, e2 =asu.GetChainList().end(); j != e2; ++j) { ChainView chain = *j; int chain_length = chain.GetResidueCount(); - if (chain_length < min_polymer_size_ && seqres.IsValid()) { + if (((chain.IsPolypeptide() && chain_length < peptide_min_size_) || + (chain.IsPolynucleotide() && chain_length < nucleicacid_min_size_) || + ((chain.IsOligosaccharide() || chain.IsPolysaccharide()) && + chain_length < saccharide_min_size_)) && + seqres.IsValid()) { seq::SequenceHandle s = seqres.FindSequence(chain.GetName()); if (s.IsValid()) chain_length = s.GetLength(); } - if (chain.IsPolymer() && chain_length >= min_polymer_size_) { + if ((chain.IsPolypeptide() && chain_length >= peptide_min_size_) || + (chain.IsPolynucleotide() && chain_length >= nucleicacid_min_size_) || + ((chain.IsOligosaccharide() || chain.IsPolysaccharide()) && + chain_length >= saccharide_min_size_)) { if (*curr_chain_name_ == 0) { throw std::runtime_error("running out of chain names"); } diff --git a/modules/mol/alg/src/pdbize.hh b/modules/mol/alg/src/pdbize.hh index 1ddf66fc5d047d882563b5e0fe7e61dc989f8c86..79ce6744d9baaed6b2ec4dd9d9ad5ee6215929bb 100644 --- a/modules/mol/alg/src/pdbize.hh +++ b/modules/mol/alg/src/pdbize.hh @@ -35,7 +35,19 @@ extern const char* WATER_CHAIN_NAME; class DLLEXPORT_OST_MOL_ALG PDBize { public: explicit PDBize(int min_polymer_size=10): - min_polymer_size_(min_polymer_size), ent_(mol::CreateEntity()), + peptide_min_size_(min_polymer_size), + nucleicacid_min_size_(min_polymer_size), + saccharide_min_size_(min_polymer_size), ent_(mol::CreateEntity()), + curr_chain_name_(POLYPEPTIDE_CHAIN_NAMES), needs_adjustment_(false), + last_rnum_(0) + {} + + explicit PDBize(int peptide_min_size, + int nucleicacid_min_size, + int saccharide_min_size): + peptide_min_size_(peptide_min_size), + nucleicacid_min_size_(nucleicacid_min_size), + saccharide_min_size_(saccharide_min_size), ent_(mol::CreateEntity()), curr_chain_name_(POLYPEPTIDE_CHAIN_NAMES), needs_adjustment_(false), last_rnum_(0) {} @@ -45,7 +57,9 @@ public: EntityHandle Finish(bool shift_to_fit=true); private: - int min_polymer_size_; + int peptide_min_size_; + int nucleicacid_min_size_; + int saccharide_min_size_; EntityHandle ent_; ChainHandle ligand_chain_; ChainHandle water_chain_; diff --git a/modules/mol/alg/tests/test_pdbize.py b/modules/mol/alg/tests/test_pdbize.py index 5a180b95267938eab7b1b2240ef3a5848fb02fbe..c9f67643a04877d3132de8d5f7a054830a389951 100644 --- a/modules/mol/alg/tests/test_pdbize.py +++ b/modules/mol/alg/tests/test_pdbize.py @@ -4,7 +4,6 @@ import os import random class TestPDBize(unittest.TestCase): - def test_numbers_water_molecules_with_ins_codes(self): m = mol.CreateEntity() e = m.EditXCS(mol.BUFFERED_EDIT) @@ -94,6 +93,187 @@ class TestPDBize(unittest.TestCase): self.assertEqual(residues[26].number.num, 2) self.assertEqual(residues[26].number.ins_code, '\0') + def _CheckMinSize(self, ost_ent, seq_list, chn_nm_lst, **kwargs): + """Check effects of the *_min_size parameter. + + :param ost_ent: OST entity to be PDBized. + :type ost_ent: :class:`~ost.mol.EntityHandle` + :param seq_list: Sequence list for the chains in ost_ent. + :type seq_list: :class:`~ost.seq.SequenceList` + :param chn_nm_lst: List of expected chain names in PDBized entity. + :type chn_nm_lst: :class:`list` of :class:`str` + """ + if "saccharide_min_size" not in kwargs: + kwargs["saccharide_min_size"] = 10 + if "nucleicacid_min_size" not in kwargs: + kwargs["nucleicacid_min_size"] = 10 + if "peptide_min_size" not in kwargs: + kwargs["peptide_min_size"] = 10 + transformations = geom.Mat4List() + transformations.append(geom.Mat4()) + pdbizer = mol.alg.PDBize(**kwargs) + pdbizer.Add(ost_ent.Select(''), transformations, seq_list) + pdbized = pdbizer.Finish() + self.assertEqual(len(pdbized.chains), len(chn_nm_lst)) + for i in range(0, len(chn_nm_lst)): + self.assertEqual(pdbized.chains[i].name, chn_nm_lst[i]) + return pdbized + + def test_peptide_min_size(self): + """Make sure the peptide_min_size parameter works, place a polypeptide in + chain '_'. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(10): + e.AppendResidue(c, "ALA") + seqs = seq.CreateSequenceList() + seqs.AddSequence(seq.CreateSequence("LotsOfAlanin", "AAAAAAAAAA")) + + # test that small peptide chains end up in the ligand chain "_" + self._CheckMinSize(m, seqs, ["_"], peptide_min_size=11) + + # test again with two small peptide chains + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(15): + e.AppendResidue(c, "ALA") + seqs.AddSequence(seq.CreateSequence("MoreAlanin", "AAAAAAAAAAAAAAA")) + self._CheckMinSize(m, seqs, ["_"], peptide_min_size=16) + + # test one peptide in ligand chain, second as polymer chain + self._CheckMinSize(m, seqs, ["_", "A"], peptide_min_size=11) + + # actually disabling min. polymer size + self._CheckMinSize(m, seqs, ["A", "B"], peptide_min_size=0) + + def test_nucleicacid_min_size(self): + """Make sure the nucleicacid_min_size parameter works, place a + polynucleotide in chain '_'. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(10): + e.AppendResidue(c, "DA") + seqs = seq.CreateSequenceList() + seqs.AddSequence(seq.CreateSequence("LotsOfAdenine", "AAAAAAAAAA")) + + # test that small nucleotide chains end up in the ligand chain "_" + self._CheckMinSize(m, seqs, ["_"], nucleicacid_min_size=11) + + # test again with two small nucleic acid chains + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(15): + e.AppendResidue(c, "DA") + seqs.AddSequence(seq.CreateSequence("MoreAdenine", "AAAAAAAAAAAAAAA")) + self._CheckMinSize(m, seqs, ["_"], nucleicacid_min_size=16) + + # test one nucleic acid in ligand chain, second as polymer chain + self._CheckMinSize(m, seqs, ["_", "A"], nucleicacid_min_size=11) + + # actually disabling min. polymer size + self._CheckMinSize(m, seqs, ["A", "B"], nucleicacid_min_size=0) + + def test_saccharide_min_size(self): + """Make sure the saccharide_min_size parameter works, place an + oligosaccharide in chain '_'. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE) + for i in range(10): + e.AppendResidue(c, "NAG") + seqs = seq.CreateSequenceList() + + # test that small oligosaccharides end up in the ligand chain "_" + self._CheckMinSize(m, seqs, ["_"], saccharide_min_size=11) + + # test again with two small oligosaccharide chains + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE) + for i in range(15): + e.AppendResidue(c, "NAG") + self._CheckMinSize(m, seqs, ["_"], saccharide_min_size=16) + + # test one oligosaccharide in ligand chain, second as polymer chain + self._CheckMinSize(m, seqs, ["_", "A"], saccharide_min_size=11) + + # actually disabling min. polymer size + + def test_peptide_nucleicacid_saccharide_min_sizes(self): + """Make sure that all thre thresholds play well together. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(10): + e.AppendResidue(c, "ALA") + seqs = seq.CreateSequenceList() + seqs.AddSequence(seq.CreateSequence("LotsOfAlanin", "AAAAAAAAAA")) + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(10): + e.AppendResidue(c, "DA") + seqs.AddSequence(seq.CreateSequence("LotsOfAdenine", "AAAAAAAAAA")) + c = e.InsertChain("C"); + e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE) + for i in range(10): + e.AppendResidue(c, "NAG") + + # Check branched entities can be abandoned in the ligand chain while + # peptides and nucleic acids live in their own chains. + pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_"], + saccharide_min_size=11, + peptide_min_size=0, + nucleicacid_min_size=0) + self.assertTrue(pdbized.chains[0].IsPolypeptide()) + self.assertTrue(pdbized.chains[1].IsPolynucleotide()) + self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"), + "oligosaccharide") + + # test to store a short polynucleotide and sugar in the ligand chain but keep + # longer polynucleotide and the peptide outside of the ligand chain. + c = e.InsertChain("D"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(5): + e.AppendResidue(c, "DG") + seqs.AddSequence(seq.CreateSequence("LotsOfGuanine", "GGGGG")) + pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_"], + saccharide_min_size=11, + peptide_min_size=0, + nucleicacid_min_size=6) + self.assertTrue(pdbized.chains[0].IsPolypeptide()) + self.assertTrue(pdbized.chains[1].IsPolynucleotide()) + self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"), + "oligosaccharide") + self.assertEqual(pdbized.chains[2].residues[-1].GetStringProp("type"), + "polydeoxyribonucleotide") + + # test to add a small peptide to the ligand chain + c = e.InsertChain("E"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(5): + e.AppendResidue(c, "ALA") + seqs.AddSequence(seq.CreateSequence("SomeAlanin", "AAAAA")) + pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_", "C"], + saccharide_min_size=11, + peptide_min_size=6, + nucleicacid_min_size=3) + self.assertTrue(pdbized.chains[0].IsPolypeptide()) + self.assertTrue(pdbized.chains[1].IsPolynucleotide()) + self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"), + "oligosaccharide") + self.assertEqual(pdbized.chains[2].residues[-1].GetStringProp("type"), + "polypeptide(L)") + + if __name__ == "__main__": from ost import testutils testutils.RunTests() diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 3a4d0d292748f1c8f7546468d8f8b4657c9d10c2..aeb18459f28e01c33c3c09a48dd8d9b9ee342822 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -342,7 +342,94 @@ The Handle Classes A chain of one or more :class:`residues <ResidueHandle>`. Chains are always part of an entity. - + + .. attribute:: atoms + + Get list of all atoms of this chain. To access a single atom, use + :meth:`FindAtom`. + + This property is read-only. Also available as :meth:`GetAtomList` + + :type: :class:`AtomHandleList` (list of :class:`AtomHandle`) + + .. attribute:: bounds + + Axis-aligned bounding box of the chain. Read-only + + :type: :class:`ost.geom.AlignedCuboid` + + .. attribute:: center_of_atoms + + Center of atoms (not mass weighted). Also available as + :meth:`GetCenterOfAtoms`. + + :type: :class:`~ost.geom.Vec3` + + .. attribute:: center_of_mass + + Center of mass. Also available as :meth:`GetCenterOfMass` + + :type: :class:`~ost.geom.Vec3` + + .. attribute:: description + + Details about the chain. Not categorised, just text. + + .. attribute:: in_sequence + + Whether the residue numbers are in ascending order. For example: + + .. code-block:: python + + chain=ent.FindChain("A") + print(chain.residues) # [A.GLY1, A.GLY2, A.GLY4A, A.GLY4B] + print(chain.in_sequence) # prints true + + chain=ent.FindChain("B") + print(chain.residues) # [B.GLY1, B.GLY4, B.GLY3] + print(chain.in_sequence) # prints false + + .. attribute:: is_oligosaccharide + + Indicates if the chain is an oligosaccharide, a branched, non-linear entity + of multiple sugars. Also available as :meth:`IsOligosaccharide`. + + :type: :class:`bool` + + .. attribute:: is_polymer + + Indicates if a chain is a polymer. True for polypeptides, polynucleotides, + polysaccharides, oligosaccharides and branched chains. Also available as + :meth:`IsPolymer`. + + :type: :class:`bool` + + .. attribute:: is_polynucleotide + + Indicates if a chain is a nucleic acid. Also available as + :meth:`IsPolynucleotide`. + + :type: :class:`bool` + + .. attribute:: is_polypeptide + + Indicates if a chain is a protein. Also available as :meth:`IsPolypeptide`. + + :type: :class:`bool` + + .. attribute:: is_polysaccharide + + Indicates if a chain is a polysaccharide. Also available as + :meth:`IsPolysaccharide()`. + + :type: :class:`bool` + + .. attribute:: mass + + The total mass of this chain in Dalton. Also available as :meth:`GetMass` + + :type: float + .. attribute:: name The chain name. The name uniquely identifies the chain in the entity. In @@ -356,15 +443,11 @@ The Handle Classes :type: str - .. attribute:: type - - Describes the type of the chain. - - :type: :class:`ChainType`. + .. attribute:: residue_count - .. attribute:: description + Number of residues. Read-only. See :meth:`GetResidueCount`. - Details about the chain. Not categorised, just text. + :type: :class:`int` .. attribute:: residues @@ -387,59 +470,11 @@ The Handle Classes :type: :class:`ResidueHandleList` (list of :class:`ResidueHandle`) - .. attribute:: in_sequence - - Whether the residue numbers are in ascending order. For example: - - .. code-block:: python - - chain=ent.FindChain("A") - print(chain.residues) # [A.GLY1, A.GLY2, A.GLY4A, A.GLY4B] - print(chain.in_sequence) # prints true - - chain=ent.FindChain("B") - print(chain.residues) # [B.GLY1, B.GLY4, B.GLY3] - print(chain.in_sequence) # prints false - - .. attribute:: residue_count - - Number of residues. Read-only. See :meth:`GetResidueCount`. - - :type: :class:`int` - - .. attribute:: atoms - - Get list of all atoms of this chain. To access a single atom, use - :meth:`FindAtom`. - - This property is read-only. Also available as :meth:`GetAtomList` - - :type: :class:`AtomHandleList` (list of :class:`AtomHandle`) - - .. attribute:: bounds - - Axis-aligned bounding box of the chain. Read-only - - :type: :class:`ost.geom.AlignedCuboid` - - .. attribute:: mass - - The total mass of this chain in Dalton. Also available as :meth:`GetMass` - - :type: float + .. attribute:: type - .. attribute:: center_of_mass + Describes the type of the chain. - Center of mass. Also available as :meth:`GetCenterOfMass` - - :type: :class:`~ost.geom.Vec3` - - .. attribute:: center_of_atoms - - Center of atoms (not mass weighted). Also available as - :meth:`GetCenterOfAtoms`. - - :type: :class:`~ost.geom.Vec3` + :type: :class:`ChainType`. .. attribute:: valid @@ -491,6 +526,26 @@ The Handle Classes See :attr:`type` + .. method:: IsOligosaccharide() + + See :attr:`is_oligosaccharide` + + .. method:: IsPolymer() + + See :attr:`is_polymer` + + .. method:: IsPolynucleotide() + + See :attr:`is_polynucleotide` + + .. method:: IsPolypeptide() + + See :attr:`is_polypeptide` + + .. method:: IsPolysaccharide() + + See :attr:`is_polysaccharide` + .. method:: GetDescription() See :attr:`description` diff --git a/modules/mol/base/pymod/export_chain.cc b/modules/mol/base/pymod/export_chain.cc index 1664c13a33ed03c1fbdc404bbac9b081522c8e4a..b5ce6d91cba30b6a3dab3a83a066f044139e1d41 100644 --- a/modules/mol/base/pymod/export_chain.cc +++ b/modules/mol/base/pymod/export_chain.cc @@ -58,10 +58,12 @@ void export_Chain() .def("IsPolypeptide", &ChainBase::IsPolypeptide) .def("IsPolynucleotide", &ChainBase::IsPolynucleotide) .def("IsPolysaccharide", &ChainBase::IsPolysaccharide) + .def("IsOligosaccharide", &ChainBase::IsOligosaccharide) .def("IsPolymer", &ChainBase::IsPolymer) .add_property("is_polypeptide", &ChainBase::IsPolypeptide) .add_property("is_polynucleotide", &ChainBase::IsPolynucleotide) .add_property("is_polysaccharide", &ChainBase::IsPolysaccharide) + .add_property("is_oligosaccharide", &ChainBase::IsOligosaccharide) .add_property("is_polymer", &ChainBase::IsPolymer) .add_property("type", &ChainBase::GetType) .add_property("description", &ChainBase::GetDescription) diff --git a/modules/mol/base/src/chain_base.cc b/modules/mol/base/src/chain_base.cc index 4114a1462be137b7ea2e84cac6618df2309849e3..2b693b7fc031d07bf3da2492a1644db649557b30 100644 --- a/modules/mol/base/src/chain_base.cc +++ b/modules/mol/base/src/chain_base.cc @@ -81,6 +81,13 @@ bool ChainBase::IsPolysaccharide() const } +bool ChainBase::IsOligosaccharide() const +{ + this->CheckValidity(); + return impl_->IsOligosaccharide(); + +} + bool ChainBase::IsPolypeptide() const { this->CheckValidity(); diff --git a/modules/mol/base/src/chain_base.hh b/modules/mol/base/src/chain_base.hh index 5463ef0a8627eac578b12e801618da69bc74c898..0ab82a3dbaf784193d3ba09fe1e687af4f96eaeb 100644 --- a/modules/mol/base/src/chain_base.hh +++ b/modules/mol/base/src/chain_base.hh @@ -82,7 +82,10 @@ public: /// \brief whether the chain is a polysaccharide bool IsPolysaccharide() const; - + + /// \brief whether the chain is an oligsaccharide (branched mmCIF entity) + bool IsOligosaccharide() const; + /// \brief whether the chain is a polypeptide bool IsPolypeptide() const; diff --git a/modules/mol/base/src/impl/chain_impl.hh b/modules/mol/base/src/impl/chain_impl.hh index a3a7a97dcec720f024c39a2bcf73ff0bc108e9b6..e5bc7aac338d1d3c82c81730b9b5709eba7cf569 100644 --- a/modules/mol/base/src/impl/chain_impl.hh +++ b/modules/mol/base/src/impl/chain_impl.hh @@ -69,14 +69,20 @@ public: { return type_==CHAINTYPE_POLY || this->IsPolypeptide() || this->IsPolynucleotide() || this->IsPolysaccharide() || - type_==CHAINTYPE_POLY_PEPTIDE_DN_RN || - type_==CHAINTYPE_OLIGOSACCHARIDE; + this->IsOligosaccharide() || + type_==CHAINTYPE_POLY_PEPTIDE_DN_RN || type_==CHAINTYPE_BRANCHED; } /// \brief whether the chain is a polysaccharide bool IsPolysaccharide() const { return type_==CHAINTYPE_POLY_SAC_D || type_==CHAINTYPE_POLY_SAC_L; } + + /// \brief whether the chain is a polysaccharide + bool IsOligosaccharide() const + { + return type_==CHAINTYPE_OLIGOSACCHARIDE; + } /// \brief whether the chain is a polypeptide bool IsPolypeptide() const { diff --git a/modules/mol/base/src/transfer_connectivity.cc b/modules/mol/base/src/transfer_connectivity.cc index b7536f4871d261f5aa5560217d60a1af1bb0e07a..448fde12e4af18822210a6d371d7979a2a289540 100644 --- a/modules/mol/base/src/transfer_connectivity.cc +++ b/modules/mol/base/src/transfer_connectivity.cc @@ -73,6 +73,32 @@ public: return false; } + bool CheckInsertionCode(ResidueHandle chk_res, ResidueHandle src_res, + ResidueHandle dst_res) { + // This is a hack to make GetDestAtomForSrcAtom work with mol::alg::PDBize, + // maybe with insertion codes in general... + // Depending on the value of min_polymer_size, PDBize puts small polymers in + // the ligand chain called '_'. The original chain is annotated by all + // residues having the same residue number but different insertion codes. + // That can lead to the same issue described further down for branched + // entities. Basically the problem is that in branched entities the residues + // do not need to be connected with their immediate neighbours breaking the + // original mechanics of GetDestAtomForSrcAtom. + // check if residue has an insertion code + if (chk_res.GetNumber().GetInsCode() != '\0') { + // check if the original residue also had no inscode + if (src_res.GetNumber().GetInsCode() == '\0') { + // There must be another residue with the same number and since + // this function is only used by PDBize, that residue originates + // from the same chain as dst_res and may be the correct partner. + if (chk_res.GetNumber().GetNum() != dst_res.GetNumber().GetNum()) { + return false; + } + } + } + return true; + } + AtomHandle GetDestAtomForSrcAtom(AtomHandle src_atom, ResidueHandle src_res, ResidueHandle dst_res, const std::map<String, AtomHandle>& name_to_atom) { @@ -87,19 +113,61 @@ public: j = to_from_->find(dst_res.GetPrev()); if (j != to_from_->end()) { if (j->second == r) { - return j->first.FindAtom(src_atom.GetName()); + if (CheckInsertionCode( + j->first.FindAtom(src_atom.GetName()).GetResidue(), + r, dst_res)) { + return j->first.FindAtom(src_atom.GetName()); + } } } j = to_from_->find(dst_res.GetNext()); if (j != to_from_->end()) { if (j->second == r) { - return j->first.FindAtom(src_atom.GetName()); + if (CheckInsertionCode( + j->first.FindAtom(src_atom.GetName()).GetResidue(), + r, dst_res)) { + return j->first.FindAtom(src_atom.GetName()); + } } } // still nothing. scan linearly through all residues. for ( j = to_from_->begin(); j != to_from_->end(); ++j) { if (j->second == r) { - return j->first.FindAtom(src_atom.GetName()); + // Check that we are connecting to the same chain, otherwise we need to + // check that the found residue also connects outwards. + if (j->first.GetChain() == dst_res.GetChain()) { + if (CheckInsertionCode( + j->first.FindAtom(src_atom.GetName()).GetResidue(), + r, dst_res)) { + return j->first.FindAtom(src_atom.GetName()); + } + } + // Using the found residue would mean connecting to a different chain + // which is unusual. So we make sure that in the residue we transfer + // from, there is also a bond to a different chain. Otherwise the + // search continues. + // For branched mmCIF entities where connected residues may be not + // direct neighbours since branched entities are non-linear, that gave + // a problem when a bio unit doubled an oligosaccharide and the search + // found the bond of the copied oligosaccharide first. In that case the + // copied oligosaccharide is a different chain and the residue from the + // same chain comes later in the list. + BondHandleList jbonds = src_atom.GetBondList(); + for (BondHandleList::iterator k = jbonds.begin(), e2 = jbonds.end(); + k !=e2; ++k) { + // determine which "side" the src_res sits on + if (k->GetFirst() == src_atom) { + if (src_atom.GetResidue().GetChain() + != k->GetSecond().GetResidue().GetChain()) { + return j->first.FindAtom(src_atom.GetName()); + } + continue; + } + if (src_atom.GetResidue().GetChain() + != k->GetFirst().GetResidue().GetChain()) { + return j->first.FindAtom(src_atom.GetName()); + } + } } } return AtomHandle(); @@ -117,3 +185,5 @@ void TransferConnectivity(EntityHandle dest, } }} + +// LocalWords: PDBize ligand diff --git a/modules/mol/base/tests/test_chain.cc b/modules/mol/base/tests/test_chain.cc index 2154bdeab94555e73c89892847e6f9cac1759af4..f965fced69eb87d610baec27f4830e8d82202369 100644 --- a/modules/mol/base/tests/test_chain.cc +++ b/modules/mol/base/tests/test_chain.cc @@ -235,59 +235,69 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(ch1.GetType() == CHAINTYPE_UNKNOWN); BOOST_CHECK(!ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_POLY); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_NON_POLY); BOOST_CHECK(ch1.GetType() == CHAINTYPE_NON_POLY); BOOST_CHECK(!ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_WATER); BOOST_CHECK(ch1.GetType() == CHAINTYPE_WATER); BOOST_CHECK(!ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_POLY_PEPTIDE_D); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_PEPTIDE_D); e.SetChainType(ch1, CHAINTYPE_POLY_PEPTIDE_L); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_PEPTIDE_L); e.SetChainType(ch1, CHAINTYPE_POLY_DN); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(ch1.IsPolynucleotide()); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_DN); e.SetChainType(ch1, CHAINTYPE_POLY_RN); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(ch1.IsPolynucleotide()); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_RN); e.SetChainType(ch1, CHAINTYPE_POLY_SAC_D); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_SAC_D); e.SetChainType(ch1, CHAINTYPE_POLY_SAC_L); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_SAC_L); @@ -295,6 +305,7 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_DN_RN); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_N_CHAINTYPES); @@ -305,30 +316,35 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(ch1.GetType() == CHAINTYPE_MACROLIDE); BOOST_CHECK(!ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE); BOOST_CHECK(ch1.GetType() == CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_POLY_PEPTIDE_DN_RN); BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_PEPTIDE_DN_RN); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_BRANCHED); BOOST_CHECK(ch1.GetType() == CHAINTYPE_BRANCHED); - BOOST_CHECK(!ch1.IsPolymer()); + BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); e.SetChainType(ch1, CHAINTYPE_OLIGOSACCHARIDE); BOOST_CHECK(ch1.GetType() == CHAINTYPE_OLIGOSACCHARIDE); BOOST_CHECK(ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(ch1.IsOligosaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); diff --git a/modules/mol/base/tests/test_transfer_connectivity.cc b/modules/mol/base/tests/test_transfer_connectivity.cc index 36ad6dc2cd5c1a0060b594ef28fa4119dfdb904a..e97be09fd66aff1a680316e978aa37a110024809 100644 --- a/modules/mol/base/tests/test_transfer_connectivity.cc +++ b/modules/mol/base/tests/test_transfer_connectivity.cc @@ -72,6 +72,150 @@ BOOST_AUTO_TEST_CASE(test_transfer_conn) BOOST_CHECK_EQUAL(12, ent2.GetBondCount()); } +BOOST_AUTO_TEST_CASE(test_transfer_two_from_one_chain) +{ + // Test that if, like by PDBize, a single chain is used to connect two chains, + // all the bonds stay in the right chain. + EntityHandle src = CreateEntity(); + XCSEditor edi = src.EditXCS(); + ChainHandle fst_chain = edi.InsertChain(String("A")); + ResidueHandle bma1 = edi.AppendResidue(fst_chain, "BMA"); + ResidueHandle man1 = edi.AppendResidue(fst_chain, "MAN"); + ResidueHandle man2 = edi.AppendResidue(fst_chain, "MAN"); + + AtomHandle bma_o3; + AtomHandle bma_o6; + AtomHandle man_c1; + + bma_o3 = edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + bma_o6 = edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + + man_c1 = edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + edi.Connect(bma_o3, man_c1); + + man_c1 = edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + edi.Connect(bma_o6, man_c1); + + EntityHandle dst = CreateEntity(); + edi = dst.EditXCS(); + fst_chain = edi.InsertChain(String("B")); + bma1 = edi.AppendResidue(fst_chain, "BMA"); + man1 = edi.AppendResidue(fst_chain, "MAN"); + man2 = edi.AppendResidue(fst_chain, "MAN"); + + edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + + edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + + edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + + ChainHandle snd_chain = edi.InsertChain(String("C")); + bma1 = edi.AppendResidue(snd_chain, "BMA"); + man1 = edi.AppendResidue(snd_chain, "MAN"); + man2 = edi.AppendResidue(snd_chain, "MAN"); + + edi.InsertAtom(bma1, "O3", geom::Vec3(-9.646, 29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(-8.642, 28.695, 27.405)); + + edi.InsertAtom(man1, "C1", geom::Vec3(-10.253, 30.563, 33.946)); + + edi.InsertAtom(man2, "C1", geom::Vec3(-8.091, 29.369, 26.233)); + + std::map<ResidueHandle, ResidueHandle> to_from; + ResidueHandleList r1 = src.GetResidueList(); + ResidueHandleList r2 = fst_chain.GetResidueList(); + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i]] = r1[i]; + } + r2 = snd_chain.GetResidueList(); + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i]] = r1[i]; + } + + TransferConnectivity(dst, to_from); + BOOST_CHECK(BondExists(dst.FindAtom("B", 1, "O3"), + dst.FindAtom("B", 2, "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("B", 1, "O6"), + dst.FindAtom("B", 3, "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("C", 1, "O3"), + dst.FindAtom("C", 2, "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("C", 1, "O6"), + dst.FindAtom("C", 3, "C1"))); + BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("C", 1, "O6"), + dst.FindAtom("B", 3, "C1")), false); + BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("B", 1, "O6"), + dst.FindAtom("C", 3, "C1")), false); +} + +BOOST_AUTO_TEST_CASE(test_transfer_two_combined_one_chain) +{ + // Test that if, like by PDBize, a single chain ends up two times in '_', the + // connectivity does not get messy. + EntityHandle src = CreateEntity(); + XCSEditor edi = src.EditXCS(); + ChainHandle fst_chain = edi.InsertChain(String("A")); + ResidueHandle bma1 = edi.AppendResidue(fst_chain, "BMA"); + ResidueHandle man1 = edi.AppendResidue(fst_chain, "MAN"); + ResidueHandle man2 = edi.AppendResidue(fst_chain, "MAN"); + + AtomHandle bma_o3; + AtomHandle bma_o6; + AtomHandle man_c1; + + bma_o3 = edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + bma_o6 = edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + + man_c1 = edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + edi.Connect(bma_o3, man_c1); + + man_c1 = edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + edi.Connect(bma_o6, man_c1); + + EntityHandle dst = CreateEntity(); + edi = dst.EditXCS(); + fst_chain = edi.InsertChain(String("_")); + bma1 = edi.AppendResidue(fst_chain, "BMA", ResNum(1, 'A')); + man1 = edi.AppendResidue(fst_chain, "MAN", ResNum(1, 'B')); + man2 = edi.AppendResidue(fst_chain, "MAN", ResNum(1, 'C')); + + edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + + bma1 = edi.AppendResidue(fst_chain, "BMA", ResNum(2, 'A')); + man1 = edi.AppendResidue(fst_chain, "MAN", ResNum(2, 'B')); + man2 = edi.AppendResidue(fst_chain, "MAN", ResNum(2, 'C')); + + edi.InsertAtom(bma1, "O3", geom::Vec3(-9.646, 29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(-8.642, 28.695, 27.405)); + edi.InsertAtom(man1, "C1", geom::Vec3(-10.253, 30.563, 33.946)); + edi.InsertAtom(man2, "C1", geom::Vec3(-8.091, 29.369, 26.233)); + + std::map<ResidueHandle, ResidueHandle> to_from; + ResidueHandleList r1 = src.GetResidueList(); + ResidueHandleList r2 = fst_chain.GetResidueList(); + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i]] = r1[i]; + } + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i+3]] = r1[i]; + } + + TransferConnectivity(dst, to_from); + + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(1, 'A'), "O3"), + dst.FindAtom("_", ResNum(1, 'B'), "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(1, 'A'), "O6"), + dst.FindAtom("_", ResNum(1, 'C'), "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(2, 'A'), "O3"), + dst.FindAtom("_", ResNum(2, 'B'), "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(2, 'A'), "O6"), + dst.FindAtom("_", ResNum(2, 'C'), "C1"))); + BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("B", ResNum(2, 'A'), "O6"), + dst.FindAtom("C", ResNum(1, 'C'), "C1")), false); +} BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/seq/alg/pymod/__init__.py b/modules/seq/alg/pymod/__init__.py index 2e9c5fcffca4375afb80a13cea941658f62c508a..3be1b5845184ad08c6284694a9410427909888dd 100644 --- a/modules/seq/alg/pymod/__init__.py +++ b/modules/seq/alg/pymod/__init__.py @@ -100,16 +100,29 @@ def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True): return seq.CreateAlignment() if try_resnum_first: aln_seq = seq.CreateSequence('atoms', '-'*len(seqres)) + aligned_resnums = set() for r1 in residues: + if r1.number.num in aligned_resnums: + LogWarning('Residue numbers must be unique. Already observed %i, ' \ + 'cannot align %s anymore.'%(r1.number.num, r1.qualified_name)) + try_resnum_first = False + break if r1.number.num <= len(seqres) and r1.number.num > 0: if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code): aln_seq[r1.number.num - 1] = r1.one_letter_code + aligned_resnums.add(r1.number.num) else: LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code + '", while SEQRES is "' + seqres[r1.number.num - 1] + '" at the corresponding position.') try_resnum_first = False break + else: + warning = 'Residue with number %i is outside of the range covered by '\ + 'SEQRES [1, %i]'%(r1.number.num, len(seqres)) + LogWarning(warning) + try_resnum_first = False + break if not try_resnum_first: fragments=[residues[0].one_letter_code] for r1, r2 in zip(residues[:-1], residues[1:]): diff --git a/singularity/Singularity b/singularity/Singularity index 9e445f865b89d6e3e1501eb2847720ca868b8005..9aa5176634186bfea200cc4fb2efaa60b3700ad2 100644 --- a/singularity/Singularity +++ b/singularity/Singularity @@ -1,6 +1,5 @@ BootStrap: docker -From: registry.scicore.unibas.ch/schwede/openstructure:2.0.0-bionic - +From: registry.scicore.unibas.ch/schwede/openstructure:2.1.0-bionic %post ############################################################################## # POST