diff --git a/doc/make.py b/doc/make.py index f928ba1c40b86258cefaead4be140d685a6d97c8..70bac2a58ca7b978cb9fe325b9b54563491edd8c 100644 --- a/doc/make.py +++ b/doc/make.py @@ -31,7 +31,8 @@ def _RequireCopy(in_name, out_name): pattern = re.compile(r'\.\.\s+image\:\:\s+([a-zA-Z0-9_\-//]+\.png|[a-zA-Z0-9_\-//]+\.jpg)') def _CheckImage(in_name): - file = open(in_name, "r") + file = open(in_name, "r", encoding='utf8') + print("IN", in_name) text = file.read() picture_list = pattern.findall(text) file.close() diff --git a/modules/conop/doc/connectivity.rst b/modules/conop/doc/connectivity.rst index 8686790cc2398668466b08abc2d30cd004ba326c..2f0f23ccedce014e33416c5ab3990732bbc10a40 100644 --- a/modules/conop/doc/connectivity.rst +++ b/modules/conop/doc/connectivity.rst @@ -9,7 +9,7 @@ Motivation The connectivity of atoms is notoriously difficult to come by for biological -macromolecules. PDB files, the de-factor standard exchange format for structural +macromolecules. PDB files, the de facto standard exchange format for structural information allows bonds to be specified in CONECT records. However, they are not mandatory. Many programs, especially the ones not requiring on connectivity of atoms, do not write CONECT records. As a result, programs and structural biology diff --git a/modules/doc/install.rst b/modules/doc/install.rst index 37af4de496f075f6b401cecb49406de907085111..7eb570ca1165c7a26060140bb08386a797241158 100644 --- a/modules/doc/install.rst +++ b/modules/doc/install.rst @@ -324,14 +324,24 @@ version of OpenStructure. Be careful at -DPYTHON_LIBRARIES, Debian 10 comes with Python 3.7 so that needs to be substituted. - + + **macOS (Catalina) with Homebrew** +.. note:: + + When switching the Qt version used to compile OST with support for the + graphical user interface, dng may start behaving weird. Symptoms are that the + user interface starts being unresponsive to mouse clicks. An easy solution + may be to close dng and remove + $HOME/Library/Preferences/org.openstructure.dng.plist and start dng again. + `Homebrew <https://brew.sh/>`_ can be used to conveniently install all dependencies. The current Python version, as of writing these instructions, is -3.7.6 but works so far. Boost comes as 1.72.0 which seems to be OK. Do not -forget to also install `boost-python3`. Eigen and SQLite also seem to be -unproblematic concerning higher version numbers. +3.8.3 but works so far. Boost comes as 1.72.0 which seems to be OK. Do not +forget to also install `boost-python3` (your system may have a lower version of +Python than 3.8.3 but it seems like `boost-python` was compiled for 3.8.3). +Eigen and SQLite also seem to be unproblematic concerning higher version numbers. If you want to build the info module or the graphical user interface, make sure you have the Xcode app installed. Just the Xcode command line tools which are @@ -360,15 +370,15 @@ C flags: .. code-block:: bash - cmake . -DPYTHON_INCLUDE_PATH=/usr/local/Cellar/python3/3.7.6_1/\ - Frameworks/Python.framework/Versions/Current/include/python3.7m \ - -DPYTHON_LIBRARIES=/usr/local/Cellar/python3/3.7.6_1/\ - Frameworks/Python.framework/Versions/Current/lib/libpython3.7m.dylib \ - -DPYTHON_ROOT=/usr/local/ \ - -DBOOST_ROOT=/usr/local \ - -DSYS_ROOT=/usr/local \ - -DOPTIMIZE=ON \ - -DCMAKE_C_FLAGS="-isystem /Applications/Xcode.app/Contents/\ + cmake . -DPYTHON_INCLUDE_PATH=/usr/local/opt/python@3.8/Frameworks/\ + Python.framework/Versions/Current/include/python3.8/ \ + -DPYTHON_LIBRARIES=/usr/local/opt/python@3.8/Frameworks/\ + Python.framework/Versions/Current/lib/libpython3.8.dylib \ + -DPYTHON_ROOT=/usr/local/opt/python@3.8/ \ + -DBOOST_ROOT=/usr/local \ + -DSYS_ROOT=/usr/local \ + -DOPTIMIZE=ON \ + -DCMAKE_C_FLAGS="-isystem /Applications/Xcode.app/Contents/\ Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/\ Library/Frameworks/OpenGL.framework/Headers/ -isystem /usr/local/opt/\ qt/lib/QtCore.framework/Headers/ -isystem /usr/local/opt/qt/lib/\ @@ -376,7 +386,7 @@ C flags: Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/\ MacOSX.sdk/System/Library/Frameworks/Security.framework/ \ -isystem /usr/local/opt/qt/lib/QtGui.framework/Headers/" \ - -DCMAKE_CXX_FLAGS="-isystem /Applications/Xcode.app/Contents/\ + -DCMAKE_CXX_FLAGS="-isystem /Applications/Xcode.app/Contents/\ Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/\ Library/Frameworks/OpenGL.framework/Headers/ -isystem /usr/local/opt/\ qt/lib/QtCore.framework/Headers/ -isystem /usr/local/opt/qt/lib/\ diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 0572ce2b9f34d6dd332b131273cd7460cfb80cca..c53a8b858d090bd3233170164482d40d50cbf1f9 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -3,14 +3,14 @@ mmCIF File Format .. currentmodule:: ost.io -The mmCIF file format is an alternate container for structural entities, also -provided by the PDB. Here we describe how to load those files and how to deal -with information provided above the common PDB format (:class:`MMCifInfo`, +The mmCIF file format is a container for structural entities provided by the +PDB. Here we describe how to load those files and how to deal with information +provided above the legacy PDB format (:class:`MMCifInfo`, :class:`MMCifInfoCitation`, :class:`MMCifInfoTransOp`, :class:`MMCifInfoBioUnit`, :class:`MMCifInfoStructDetails`, :class:`MMCifInfoObsolete`, :class:`MMCifInfoStructRef`, :class:`MMCifInfoStructRefSeq`, :class:`MMCifInfoStructRefSeqDif`, -:class:`MMCifInfoRevisions`). +:class:`MMCifInfoRevisions`, :class:`MMCifInfoEntityBranchLink`). Loading mmCIF Files @@ -50,6 +50,10 @@ The following categories of a mmCIF file are considered by the reader: :class:`MMCifInfoRevisions` * ``pdbx_audit_revision_history`` and ``pdbx_audit_revision_details`` (mmCIF dictionary version >= 5) used to fill :class:`MMCifInfoRevisions` +* ``pdbx_entity_branch`` and ``pdbx_entity_branch_link`` used for + :class:`MMCifInfoEntityBranchLink`, a list of links is available by + :meth:`~MMCifInfo.GetEntityBranchLinks` and + :meth:`~MMCifInfo.GetEntityBranchByChain` Notes: @@ -303,6 +307,55 @@ of the annotation available. See :attr:`obsolete` + .. method:: GetEntityBranchLinks() + + Get bond information for branched entities. Returns all + :class:`MMCifInfoEntityBranchLink` objects in one list. Chain and residue + information is available by the stored + :class:`AtomHandles <ost.mol.AtomHandle>` of each entry. + + :returns: :class:`list` of :class:`MMCifInfoEntityBranchLink` + + .. method:: GetEntityBranchByChain(chain_name) + + Get bond information for chains with branched entities. Returns all + :class:`MMCifInfoEntityBranchLink` objects in one list if chain is a + branched entity, an empty list otherwise. + + :param chain_name: Chain name to check for branch links + :type chain_name: :class:`str` + :returns: :class:`list` of :class:`MMCifInfoEntityBranchLink` + + .. method:: AddEntityBranchLink(chain_name, atom1, atom2, bond_order) + + Add bond information for a branched entity. + + :param chain_name: Chain the bond belongs to + :type chain_name: :class:`str` + :param atom1: First atom of the bond + :type atom1: :class:`~ost.mol.AtomHandle` + :param atom2: Second atom of the bond + :type atom2: :class:`~ost.mol.AtomHandle` + :param bond_order: Bond order (e.g. 1=single, 2=double, 3=triple) + :type bond_order: :class:`int` + :returns: Nothing + + .. method:: GetEntityBranchChainNames + + Get a list of chain names which contain branched entities. + + :returns: :class:`list` of :class:`str` + + .. method:: GetEntityBranchChains + + Get a list of chains which contain branched entities. + + :returns: :class:`list` of :class:`~ost.mol.ChainHandle` + + .. method:: ConnectBranchLinks + + Establish all bonds stored for branched entities. + .. class:: MMCifInfoCitation This stores citation information from an input file. @@ -1175,15 +1228,84 @@ of the annotation available. See :attr:`first_release` +.. class:: MMCifInfoEntityBranchLink + + Data from ``pdbx_entity_branch``, most specifically + ``pdbx_entity_branch_link``. That is connectivity information for branched + entities, e.g. carbohydrates/ oligosaccharides. + :class:`Conop Processors <ost.conop.Processor>` can not easily connect them so + we use this information in :meth:`LoadMMCIF` to do that. + + .. attribute:: atom1 + + The first atom of the bond. Corresponds to ``entity_branch_link.atom_id_1``, + ``entity_branch_link.comp_id_1`` and + ``entity_branch_link.entity_branch_list_num_1``. Also available via + :meth:`GetAtom1` and :meth:`SetAtom1`. + + :type: :class:`~ost.mol.AtomHandle` + + .. attribute:: atom2 + + The second atom of the bond. Corresponds to ``entity_branch_link.atom_id_2``, + ``entity_branch_link.comp_id_2`` and + ``entity_branch_link.entity_branch_list_num_2``. Also available via + :meth:`GetAtom2` and :meth:`SetAtom2`. + + :type: :class:`~ost.mol.AtomHandle` + + .. attribute:: bond_order + + Order of a bond (e.g. 1=single, 2=double, 3=triple). Corresponds to + ``entity_branch_link.value_order``. Also available via :meth:`GetBondOrder` + and :meth:`SetBondOrder`. + + :type: :class:`int` + + .. method:: ConnectBranchLink(editor) + + Establish a bond between :attr:`atom1` and :attr:`atom2` of a + :class:`MMCifInfoEntityBranchLink`. + + :param editor: The editor instance to call for connecting the atoms. + :type editor: :class:`~ost.mol.XCSEditor` + :returns: Nothing + + .. method:: GetAtom1 + + See :attr:`atom1` + + .. method:: GetAtom2 + + See :attr:`atom2` + + .. method:: GetBondOrder + + See :attr:`bond_order` + + .. method:: SetAtom1 + + See :attr:`atom1` + + .. method:: SetAtom2 + + See :attr:`atom2` + + .. method:: SetBondOrder + + See :attr:`bond_order` + .. LocalWords: cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE .. LocalWords: SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif .. LocalWords: biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym .. LocalWords: auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr -.. LocalWords: GetRevisions AddRevision SetRevisionsDateOriginal GetSize +.. LocalWords: GetRevisions AddRevision SetRevisionsDateOriginal GetSize str .. LocalWords: GetNum num GetStatus GetLastDate GetFirstRelease storable -.. LocalWords: cas isbn pubmed asu seqres conop casp COMPND OBSLTE +.. LocalWords: cas isbn pubmed asu seqres conop casp COMPND OBSLTE LoadMMCIF .. LocalWords: SetChainList MMCifInfoTransOp ChainTypes MMCifInfoStructRef .. LocalWords: MMCifInfoRevisions bool difs MMCifInfoStructRefSeqDif rnum .. LocalWords: SetDateOriginal GetDateOriginal yyyy operationsintervalls -.. LocalWords: chainintervalls GetChainIntervalList GetMethodDetails -.. LocalWords: GetOperationsIntervalList SetMethodDetails +.. LocalWords: chainintervalls GetChainIntervalList GetMethodDetails GetAtom +.. LocalWords: GetOperationsIntervalList SetMethodDetails oligosaccharides +.. LocalWords: SetAtom GetBondOrder SetBondOrder MMCifInfoEntityBranchLink +.. LocalWords: GetEntityBranchByChain param diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index a71fe0d186936e0fe0114afec9180a469e3341a2..cf8063d011627d477fd7401d004d7b6664a621f7 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -277,9 +277,9 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM', def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False): """ - Load MMCIF file from disk and return one or more entities. Several options - allow to customize the exact behaviour of the MMCIF import. For more - information on these options, see :doc:`profile`. + Load a mmCIF file and return one or more entities. Several options allow to + customize the exact behaviour of the mmCIF import. For more information on + these options, see :doc:`profile`. Residues are flagged as ligand if they are mentioned in a HET record. @@ -339,6 +339,7 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT' reader.Parse() if prof.processor: prof.processor.Process(ent) + reader.info.ConnectBranchLinks() #else: # raise IOError("File doesn't contain any entities") if seqres and info: diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index fa63ed2448bf4fad6dc88fb9751bb74ec0ebf9aa..11223672cc0e6fb8121cdc44a2ad58ede5c0211c 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -19,6 +19,7 @@ #include <boost/python.hpp> #include <boost/shared_ptr.hpp> #include <boost/python/suite/indexing/vector_indexing_suite.hpp> +#include <boost/python/suite/indexing/map_indexing_suite.hpp> using namespace boost::python; #include <ost/export_helper/pair_to_tuple_conv.hh> @@ -29,6 +30,20 @@ using namespace ost; using namespace ost::io; using namespace ost::mol; +template<typename T> +boost::python::list VecToList(std::vector<T>& vec){ + boost::python::list l; + for(typename std::vector<T>::iterator it=vec.begin();it!=vec.end();++it){ + l.append(*it); + } + return l; +} + +boost::python::list WrapGetNames(MMCifInfo *p){ + std::vector<String> names = p->GetEntityBranchChainNames(); + return VecToList<String>(names); +} + void export_mmcif_io() { class_<MMCifReader, boost::noncopyable>("MMCifReader", init<const String&, EntityHandle&, const IOProfile&>()) @@ -332,6 +347,36 @@ void export_mmcif_io() .add_property("first_release", &MMCifInfoRevisions::GetFirstRelease) ; + class_<MMCifInfoEntityBranchLink>("MMCifInfoEntityBranchLink", + init<mol::AtomHandle, + mol::AtomHandle, unsigned char>()) + .def("GetAtom1", &MMCifInfoEntityBranchLink::GetAtom1) + .def("GetAtom2", &MMCifInfoEntityBranchLink::GetAtom2) + .def("GetBondOrder", &MMCifInfoEntityBranchLink::GetBondOrder) + .def("ConnectBranchLink", &MMCifInfoEntityBranchLink::ConnectBranchLink) + .def("SetAtom1", &MMCifInfoEntityBranchLink::SetAtom1) + .def("SetAtom2", &MMCifInfoEntityBranchLink::SetAtom2) + .def("SetBondOrder", &MMCifInfoEntityBranchLink::SetBondOrder) + .def(self_ns::str(self)) + .add_property("atom1", &MMCifInfoEntityBranchLink::GetAtom1, + &MMCifInfoEntityBranchLink::SetAtom1) + .add_property("atom2", &MMCifInfoEntityBranchLink::GetAtom2, + &MMCifInfoEntityBranchLink::SetAtom2) + .add_property("bond_order", &MMCifInfoEntityBranchLink::GetBondOrder, + &MMCifInfoEntityBranchLink::SetBondOrder) + ; + + class_<MMCifInfoEntityBranchLinkMap>("MMCifInfoEntityBranchLinkMap", init<>()) + .def(map_indexing_suite<MMCifInfoEntityBranchLinkMap>()) + ; + + class_<std::vector<MMCifInfoEntityBranchLink> >( + "MMCifInfoEntityBranchLinkList", + init<>()) + .def(vector_indexing_suite<std::vector<MMCifInfoEntityBranchLink> >()) + .def(self_ns::str(self)) + ; + class_<MMCifInfo>("MMCifInfo", init<>()) .def("AddCitation", &MMCifInfo::AddCitation) .def("GetCitations", make_function(&MMCifInfo::GetCitations, @@ -366,6 +411,12 @@ void export_mmcif_io() (arg("num"), arg("date"), arg("status"), arg("major")=-1, arg("minor")=-1)) .def("GetRevisions", &MMCifInfo::GetRevisions) + .def("AddEntityBranchLink", &MMCifInfo::AddEntityBranchLink) + .def("GetEntityBranchLinks", &MMCifInfo::GetEntityBranchLinks) + .def("GetEntityBranchByChain", &MMCifInfo::GetEntityBranchByChain) + .def("ConnectBranchLinks", &MMCifInfo::ConnectBranchLinks) + .def("GetEntityBranchChainNames", &WrapGetNames) + .def("GetEntityBranchChains", &MMCifInfo::GetEntityBranchChains) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index cf97f5e23d933c68a7fa6e5419027ae5b24cf534..9ff88e2f0a86ab236fc8074b59f392e8e92c931b 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -175,6 +175,9 @@ std::map<String, mol::ChemType> ChemdictParser::xtm_=std::map<String, mol::ChemT void ChemdictParser::InitTypeMap() { + // This list is manually kept up to date to include all types appearing in PDB + // See doc for how to create a compound library from scratch to test this + // https://openstructure.org/docs/conop/compoundlib/#creating-a-compound-library if (!tm_.empty()) return; tm_["L-PEPTIDE COOH CARBOXY TERMINUS"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); @@ -184,6 +187,7 @@ void ChemdictParser::InitTypeMap() tm_["D-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["D-BETA-PEPTIDE, C-GAMMA LINKING"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); tm_["D-GAMMA-PEPTIDE, C-DELTA LINKING"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); + tm_["L-SACCHARIDE, ALPHA LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); tm_["L-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); tm_["D-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); tm_["L-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index 25c58ff9ced52ae52170239f132ccdc57c946674..ce93b67789ce168b1c77eaf04105da03967c1f63 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -195,4 +195,109 @@ MMCifInfoStructRefSeq::AddDif(int seq_rnum, const String& db_rnum, const String& return d; } +void MMCifInfo::AddEntityBranchLink(String chain_name, + mol::AtomHandle atom1, + mol::AtomHandle atom2, + unsigned char bond_order) +{ + // check if element already exists + MMCifInfoEntityBranchLinkMap::iterator blm_it = + entity_branches_.find(chain_name); + if (blm_it == entity_branches_.end()) { + // `find` points to the end of the map so chain_name was not found + std::pair<MMCifInfoEntityBranchLinkMap::iterator, bool> rit = + entity_branches_.insert(MMCifInfoEntityBranchLinkMap::value_type(chain_name, + std::vector<MMCifInfoEntityBranchLink>())); + // let blm_it point to the new element so we can add to the vector + blm_it = rit.first; + } + // add new branch element + blm_it->second.push_back(MMCifInfoEntityBranchLink(atom1, atom2, bond_order)); +} + +const std::vector<MMCifInfoEntityBranchLink> MMCifInfo::GetEntityBranchLinks() const +{ + std::vector<MMCifInfoEntityBranchLink> all_links; + MMCifInfoEntityBranchLinkMap::const_iterator blm_it; + for (blm_it = entity_branches_.begin(); + blm_it != entity_branches_.end(); ++blm_it) { + std::copy(blm_it->second.begin(), blm_it->second.end(), + std::back_inserter(all_links)); + } + return all_links; +} + +const std::vector<MMCifInfoEntityBranchLink> MMCifInfo::GetEntityBranchByChain( + const String& chain_name) const +{ + // search the requested chain + MMCifInfoEntityBranchLinkMap::const_iterator blm_it = + entity_branches_.find(chain_name); + if (blm_it != entity_branches_.end()) { + return blm_it->second; + } + return std::vector<MMCifInfoEntityBranchLink>(); +} + +const std::vector<String> MMCifInfo::GetEntityBranchChainNames() const +{ + std::vector<String> chain_names; + MMCifInfoEntityBranchLinkMap::const_iterator blm_it; + for (blm_it = entity_branches_.begin(); + blm_it != entity_branches_.end(); ++blm_it) { + chain_names.push_back(blm_it->first); + } + return chain_names; +} + +const mol::ChainHandleList MMCifInfo::GetEntityBranchChains() const +{ + std::vector<mol::ChainHandle> chains; + MMCifInfoEntityBranchLinkMap::const_iterator blm_it; + for (blm_it = entity_branches_.begin(); + blm_it != entity_branches_.end(); ++blm_it) { + chains.push_back(blm_it->second[0].GetAtom1().GetResidue().GetChain()); + } + + return chains; +} + +void MMCifInfo::ConnectBranchLinks() +{ + MMCifInfoEntityBranchLinkMap::iterator blm_it; + for (blm_it = entity_branches_.begin(); + blm_it != entity_branches_.end(); ++blm_it) { + // We assume that one chain only comes from one entity, so we go with one + // editor per chain + std::vector<MMCifInfoEntityBranchLink>::iterator blv_it = + blm_it->second.begin(); + if (blv_it != blm_it->second.end()) { + mol::XCSEditor editor = blv_it->GetAtom1().GetEntity().EditXCS(); + for (; blv_it != blm_it->second.end(); ++blv_it) { + blv_it->ConnectBranchLink(editor); + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const MMCifInfoEntityBranchLink& eb) +{ + os << "<MMCifInfoEntityBranchLink atom1:" << eb.GetAtom1() << " atom2:" + << eb.GetAtom2() << ">"; + return os; +} + +std::ostream& operator<<(std::ostream& os, + const std::vector<MMCifInfoEntityBranchLink>& eb_list) +{ + os << "<MMCifInfoEntityBranchLinkList"; + std::vector<MMCifInfoEntityBranchLink>::const_iterator bl_it; + for (bl_it = eb_list.begin(); bl_it != eb_list.end(); ++bl_it) { + os << " <atom1:" << bl_it->GetAtom1() << " atom2:" + << bl_it->GetAtom2() << ">"; + } + os << ">"; + return os; +} + }} //ns diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index c152229ef838e7ce1d0abb4d39688112d3662def..b97949105b4818777d0650d8ba59f45e8f05e8ab 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -26,6 +26,7 @@ #include <ost/geom/geom.hh> #include <ost/string_ref.hh> #include <ost/io/module_config.hh> +#include <ost/mol/mol.hh> namespace ost { namespace io { @@ -918,6 +919,45 @@ private: String details_; }; +/// \brief Store information on branched structures (oligosaccharides) +/// +class DLLEXPORT_OST_IO MMCifInfoEntityBranchLink { +public: + MMCifInfoEntityBranchLink(mol::AtomHandle atom1, + mol::AtomHandle atom2, + unsigned char bond_order): +atom1_(atom1), atom2_(atom2), bond_order_(bond_order) {} + mol::AtomHandle GetAtom1() const { return atom1_;} + mol::AtomHandle GetAtom2() const { return atom2_; } + unsigned char GetBondOrder() const { return bond_order_; } + void SetAtom1(mol::AtomHandle atom) { atom1_ = atom; } + void SetAtom2(mol::AtomHandle atom) { atom2_ = atom; } + void SetBondOrder(unsigned char bond_order) { bond_order_ = bond_order; } + void ConnectBranchLink(mol::XCSEditor editor) { + editor.Connect(atom1_, atom2_, bond_order_); + } + + bool operator==(const MMCifInfoEntityBranchLink& eb) const { + if (this->atom1_ != eb.atom1_) { + return false; + } + if (this->atom2_ != eb.atom2_) { + return false; + } + return true; + } + + bool operator!=(const MMCifInfoEntityBranchLink& eb) const { + return !this->operator == (eb); + } + +private: + mol::AtomHandle atom1_; + mol::AtomHandle atom2_; + unsigned char bond_order_; +}; +typedef std::map<String, std::vector<MMCifInfoEntityBranchLink> > MMCifInfoEntityBranchLinkMap; + /// \brief container class for additional information from MMCif files /// /// \section mmcif annotation information @@ -1126,6 +1166,39 @@ public: { return revisions_; } + + /// \brief Add bond information for a branched entity + /// + /// \param chain_name chain the bond belongs to + /// \param atom1 first atom of the bond + /// \param atom2 second atom of the bond + void AddEntityBranchLink(String chain_name, + mol::AtomHandle atom1, + mol::AtomHandle atom2, + unsigned char bond_order); + + /// \brief Get all links for all branched entities + /// + const std::vector<MMCifInfoEntityBranchLink> GetEntityBranchLinks() const; + + /// \brief Check if a chain is a branched entity and return it + /// + /// \param chain_name Name of the chain to check + const std::vector<MMCifInfoEntityBranchLink> GetEntityBranchByChain( + const String& chain_name) const; + + /// \brief Get the names of all chains of branched entities. + /// + const std::vector<String> GetEntityBranchChainNames() const; + + /// \brief Get the all chains of branched entities. + /// + const mol::ChainHandleList GetEntityBranchChains() const; + + /// \brief Connect all atoms listed as links for branched entities. + /// + void ConnectBranchLinks(); + //protected: private: @@ -1140,13 +1213,18 @@ private: std::vector<MMCifInfoCitation> citations_; ///< list of citations std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; - MMCifInfoStructRefs struct_refs_; + MMCifInfoStructRefs struct_refs_; std::map<String, String> cif_2_pdb_chain_id_; std::map<String, String> pdb_2_cif_chain_id_; std::map<String, String> cif_2_entity_id_; + std::map<String, std::vector<MMCifInfoEntityBranchLink> > entity_branches_; }; +DLLEXPORT_OST_IO std::ostream& operator<<(std::ostream& os, + const MMCifInfoEntityBranchLink& eb); +DLLEXPORT_OST_IO std::ostream& operator<<(std::ostream& os, + const std::vector<MMCifInfoEntityBranchLink>& eb_list); }} // ns #endif diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 6392ef4b11cc5f3eaff1fbd23536f0ffe03586b5..e2f67b11973a381ce034836d98a54f808cdfcd77 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -90,6 +90,7 @@ void MMCifReader::ClearState() revisions_.clear(); revision_types_.clear(); database_PDB_rev_added_ = false; + entity_branch_link_map_.clear(); } void MMCifReader::SetRestrictChains(const String& restrict_chains) @@ -362,7 +363,30 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) indices_[PDS_RECVD_INITIAL_DEPOSITION_DATE] = header.GetIndex("recvd_initial_deposition_date"); cat_available = true; - } + } else if (header.GetCategory() == "pdbx_entity_branch") { + category_ = PDBX_ENTITY_BRANCH; + // mandatory + this->TryStoreIdx(BR_ENTITY_ID, "entity_id", header); + this->TryStoreIdx(BR_ENTITY_TYPE, "type", header); + cat_available = true; + } else if (header.GetCategory() == "pdbx_entity_branch_link") { + category_ = PDBX_ENTITY_BRANCH_LINK; + // mandatory + this->TryStoreIdx(BL_ENTITY_ID, "entity_id", header); + this->TryStoreIdx(BL_ATOM_ID_1, "atom_id_1", header); + this->TryStoreIdx(BL_ATOM_ID_2, "atom_id_2", header); + this->TryStoreIdx(BL_COMP_ID_1, "comp_id_1", header); + this->TryStoreIdx(BL_COMP_ID_2, "comp_id_2", header); + this->TryStoreIdx(BL_ENTITY_BRANCH_LIST_NUM_1, "entity_branch_list_num_1", + header); + this->TryStoreIdx(BL_ENTITY_BRANCH_LIST_NUM_2, "entity_branch_list_num_2", + header); + // optional items + indices_[BL_ATOM_STEREO_CONFIG_1] = header.GetIndex("atom_stereo_config_1"); + indices_[BL_ATOM_STEREO_CONFIG_2] = header.GetIndex("atom_stereo_config_2"); + indices_[BL_VALUE_ORDER] = header.GetIndex("value_order"); + cat_available = true; + } category_counts_[category_]++; return cat_available; } @@ -636,47 +660,50 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) columns[indices_[GROUP_PDB]][0]=='H'); } +MMCifReader::MMCifEntityDescMap::iterator MMCifReader::GetEntityDescMapIterator( + const String& entity_id) +{ + MMCifEntityDescMap::iterator edm_it = entity_desc_map_.find(entity_id); + // if the entity ID is not already stored, insert it with empty values + if (edm_it == entity_desc_map_.end()) { + MMCifEntityDesc desc = {.type=mol::CHAINTYPE_N_CHAINTYPES, + .details="", + .seqres=""}; + edm_it = entity_desc_map_.insert(entity_desc_map_.begin(), + MMCifEntityDescMap::value_type(entity_id, + desc)); + } + return edm_it; +} + void MMCifReader::ParseEntity(const std::vector<StringRef>& columns) { - bool store = false; // is it worth storing this record? - MMCifEntityDesc desc; + MMCifEntityDescMap::iterator edm_it = + GetEntityDescMapIterator(columns[indices_[E_ID]].str()); // type if (indices_[E_TYPE] != -1) { - desc.type = mol::ChainTypeFromString(columns[indices_[E_TYPE]]); - store = true; + // only use the entity type if no other is set, entity_poly type is + // more precise, so if that was set before just leave it in + if (edm_it->second.type == mol::CHAINTYPE_N_CHAINTYPES) { + edm_it->second.type = mol::ChainTypeFromString(columns[indices_[E_TYPE]]); + } + } else { + // don't deal with entities without type + entity_desc_map_.erase(edm_it); + return; } // description if (indices_[PDBX_DESCRIPTION] != -1) { - desc.details = columns[indices_[PDBX_DESCRIPTION]].str(); - } else { - desc.details = ""; - } - - if (store) { - desc.seqres = ""; - entity_desc_map_.insert( - MMCifEntityDescMap::value_type(columns[indices_[E_ID]].str(), - desc) - ); + edm_it->second.details = columns[indices_[PDBX_DESCRIPTION]].str(); } } void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns) { - // we assume that the entity cat. ALWAYS comes before the entity_poly cat. - // search entity MMCifEntityDescMap::iterator edm_it = - entity_desc_map_.find(columns[indices_[ENTITY_ID]].str()); - - if (edm_it == entity_desc_map_.end()) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "'entity_poly' category defined before 'entity' for id '" + - columns[indices_[ENTITY_ID]].str() + - "' or missing.", - this->GetCurrentLinenum())); - } + GetEntityDescMapIterator(columns[indices_[ENTITY_ID]].str()); // store type if (indices_[EP_TYPE] != -1) { @@ -1541,6 +1568,14 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing pdbx_database_status entry"); this->ParsePdbxDatabaseStatus(columns); break; + case PDBX_ENTITY_BRANCH: + LOG_TRACE("processing pdbx_entity_branch entry"); + this->ParsePdbxEntityBranch(columns); + break; + case PDBX_ENTITY_BRANCH_LINK: + LOG_TRACE("processing pdbx_entity_branch_link entry"); + this->ParsePdbxEntityBranchLink(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -1679,6 +1714,68 @@ void MMCifReader::ParseStructRefSeqDif(const std::vector<StringRef>& columns) } } +void MMCifReader::ParsePdbxEntityBranch(const std::vector<StringRef>& columns) +{ + // get entity/ descreption entry + MMCifEntityDescMap::iterator edm_it = + GetEntityDescMapIterator(columns[indices_[BR_ENTITY_ID]].str()); + + // store type + if (indices_[BR_ENTITY_TYPE] != -1) { + edm_it->second.type = mol::ChainTypeFromString(columns[indices_[EP_TYPE]]); + } +} + +void MMCifReader::ParsePdbxEntityBranchLink(const std::vector<StringRef>& columns) +{ + MMCifPdbxEntityBranchLink link_pair; + + String entity_id(columns[indices_[BL_ENTITY_ID]].str()); + + // list of entities -> pairs of info for link + link_pair.res_num_1 = + this->TryGetInt(columns[indices_[BL_ENTITY_BRANCH_LIST_NUM_1]], + "pdbx_entity_branch_link.entity_branch_list_num_1"); + link_pair.cmp_1 = columns[indices_[BL_COMP_ID_1]].str(); + link_pair.atm_nm_1 = columns[indices_[BL_ATOM_ID_1]].str(); + link_pair.res_num_2 = + this->TryGetInt(columns[indices_[BL_ENTITY_BRANCH_LIST_NUM_2]], + "pdbx_entity_branch_link.entity_branch_list_num_2"); + link_pair.cmp_2 = columns[indices_[BL_COMP_ID_2]].str(); + link_pair.atm_nm_2 = columns[indices_[BL_ATOM_ID_2]].str(); + + /*if (indices_[BL_ATOM_STEREO_CONFIG_1] != -1) { + char A = *columns[indices_[BL_ATOM_STEREO_CONFIG_1]].begin(); + }*/ + // check stereo values to be N S R + /*if (indices_[BL_ATOM_STEREO_CONFIG_2] != -1) { + }*/ + // check value order + if (indices_[BL_VALUE_ORDER] != -1) { + link_pair.bond_order = MMCifValueOrderToOSTBondOrder( + columns[indices_[BL_VALUE_ORDER]]); + } else { + link_pair.bond_order = 1; + } + + std::pair<MMCifPdbxEntityBranchLinkMap::iterator, bool> rit; + + // check if element already exists + MMCifPdbxEntityBranchLinkMap::iterator blm_it = + entity_branch_link_map_.find(entity_id); + + // if the entity was not seen before, create it in the map + if (blm_it == entity_branch_link_map_.end()) { + rit = entity_branch_link_map_.insert( + MMCifPdbxEntityBranchLinkMap::value_type(entity_id, + std::vector<MMCifPdbxEntityBranchLink>())); + blm_it = rit.first; + } + + // add the link pair + blm_it->second.push_back(link_pair); +} + void MMCifReader::OnEndData() { mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); @@ -1686,10 +1783,12 @@ void MMCifReader::OnEndData() // process chain types std::vector<std::pair<mol::ChainHandle, String> >::const_iterator css; MMCifEntityDescMap::const_iterator edm_it; + MMCifPdbxEntityBranchLinkMap::const_iterator blm_it; + std::vector<MMCifPdbxEntityBranchLink>::const_iterator bl_it; String pdb_auth_chain_name; for (css = chain_id_pairs_.begin(); css != chain_id_pairs_.end(); ++css) { + // chain description edm_it = entity_desc_map_.find(css->second); - if (edm_it != entity_desc_map_.end()) { editor.SetChainType(css->first, edm_it->second.type); editor.SetChainDescription(css->first, edm_it->second.details); @@ -1713,6 +1812,22 @@ void MMCifReader::OnEndData() LOG_WARNING("No entity description found for atom_site.label_entity_id '" << css->second << "'"); } + // find + blm_it = entity_branch_link_map_.find(css->second); + // store linker pair + if (blm_it != entity_branch_link_map_.end()) { + for (bl_it = blm_it->second.begin(); bl_it != blm_it->second.end(); + ++bl_it) { + mol::ResidueHandle res1 = css->first.FindResidue(to_res_num( + bl_it->res_num_1, ' ')); + mol::ResidueHandle res2 = css->first.FindResidue(to_res_num( + bl_it->res_num_2, ' ')); + info_.AddEntityBranchLink(css->first.GetName(), + res1.FindAtom(bl_it->atm_nm_1), + res2.FindAtom(bl_it->atm_nm_2), + bl_it->bond_order); + } + } } // process citations (couple with authors @@ -1807,4 +1922,64 @@ void MMCifReader::OnEndData() << strand_list_.size() << " strands"); } +unsigned char MMCifValueOrderToOSTBondOrder(const StringRef value_order) +{ + if (value_order == StringRef("sing", 4)) { + return 1; + } + if (value_order == StringRef("doub", 4)) { + return 2; + } + if (value_order == StringRef("trip", 4)) { + return 3; + } + LOG_WARNING("Non-covered bond order found: '" << value_order << "'"); + if (value_order == StringRef("arom", 4)) { + return 4; + } + if (value_order == StringRef("delo", 4)) { + return 5; + } + if (value_order == StringRef("pi", 2)) { + return 6; + } + if (value_order == StringRef("poly", 4)) { + return 7; + } + if (value_order == StringRef("quad", 4)) { + return 8; + } + return 1; +} + +String OSTBondOrderToMMCifValueOrder(const unsigned char bond_order) +{ + if (bond_order == 1) { + return String("sing"); + } + if (bond_order == 2) { + return String("doub"); + } + if (bond_order == 3) { + return String("trip"); + } + if (bond_order == 4) { + return String("arom"); + } + if (bond_order == 5) { + return String("delo"); + } + if (bond_order == 6) { + return String("pi"); + } + if (bond_order == 7) { + return String("poly"); + } + if (bond_order == 8) { + return String("quad"); + } + LOG_WARNING("Unknow bond order found: '" << (int)bond_order << "'"); + return String(""); +} + }} diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 9147a234c4f4966b4ac861f45c48f0a2e7bee829..c6e2e32fdbfef23eba378f9e6efbffb7ac3a79df 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -56,6 +56,8 @@ namespace ost { namespace io { /// \li struct_sheet_range /// \li pdbx_database_PDB_obs_spr /// \li database_PDB_rev +/// \li pdbx_entity_branch +/// \li pdbx_entity_branch_link class DLLEXPORT_OST_IO MMCifReader : public StarParser { public: /// \brief create a MMCifReader @@ -326,6 +328,16 @@ protected: /// \param columns data row void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF pdbx_entity_branch information + /// + /// \param columns data row + void ParsePdbxEntityBranch(const std::vector<StringRef>& columns); + + /// \brief Fetch mmCIF pdbx_entity_branch_link information + /// + /// \param columns data row + void ParsePdbxEntityBranchLink(const std::vector<StringRef>& columns); + /// \struct types of secondary structure typedef enum { MMCIF_HELIX, @@ -561,6 +573,26 @@ private: PDS_RECVD_INITIAL_DEPOSITION_DATE, ///< date of initial deposition } PdbxDatabaseStatusItems; + /// \enum items of the pdbx_entity_branch category (pendant to entity_poly) + typedef enum { + BR_ENTITY_ID, ///< pointer to entity.id + BR_ENTITY_TYPE ///< type of branched molecular entity + } EntityBranchItems; + + /// \enum items of the pdbx_entity_branch_link category + typedef enum { + BL_ENTITY_ID, /// < pointer to entity.id + BL_ATOM_ID_1, /// < atom identifier (element + number) + BL_ATOM_ID_2, /// < atom identifier (element + number) + BL_COMP_ID_1, /// < tlc of component + BL_COMP_ID_2, /// < tlc of component + BL_ENTITY_BRANCH_LIST_NUM_1, /// < res. no. (pdbx_entity_branch_list.num) + BL_ENTITY_BRANCH_LIST_NUM_2, /// < res. no. (pdbx_entity_branch_list.num) + BL_ATOM_STEREO_CONFIG_1, /// < chiral configuration (R/ S) + BL_ATOM_STEREO_CONFIG_2, /// < chiral configuration (R/ S) + BL_VALUE_ORDER /// < bond order + } EntityBranchLinkItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -584,6 +616,8 @@ private: PDBX_AUDIT_REVISION_HISTORY, PDBX_AUDIT_REVISION_DETAILS, PDBX_DATABASE_STATUS, + PDBX_ENTITY_BRANCH, + PDBX_ENTITY_BRANCH_LINK, DONT_KNOW } MMCifCategory; @@ -595,6 +629,11 @@ private: } MMCifEntityDesc; typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap; + /// \brief Get an iterator for MMCifEntityDescMap by finding an element or + /// inserting a new one into the map. + /// \param entity_id ID of the entity to talk to + MMCifEntityDescMap::iterator GetEntityDescMapIterator(const String& entity_id); + /// \struct assembly information typedef struct { String biounit_id; ///< identifier for the bu @@ -638,6 +677,19 @@ private: int minor; ///< minor version }; + /// \struct to keep entity_branch linker pairs while parsing + typedef struct { + int res_num_1; ///< index of the linked residue relative to its chain + String cmp_1; ///< tlc of residue (sugar) + String atm_nm_1; ///< (IUPAC) name of the linking atom + int res_num_2; ///< index of the linked residue relative to its chain + String cmp_2; ///< tlc of residue (sugar) + String atm_nm_2; ///< index of the linked residue relative to its chain + unsigned char bond_order; ///< ID of value_order as OST bond_order + } MMCifPdbxEntityBranchLink; + typedef std::map<String, std::vector<MMCifPdbxEntityBranchLink> > + MMCifPdbxEntityBranchLinkMap; + // members MMCifCategory category_; int category_counts_[DONT_KNOW+1]; ///< overall no. of atom_site loops @@ -673,8 +725,24 @@ private: std::vector<MMCifRevisionDesc> revisions_; std::map<int, String> revision_types_; bool database_PDB_rev_added_; + // for entity_branch connections + MMCifPdbxEntityBranchLinkMap entity_branch_link_map_; }; +/// \brief Translate mmCIF info on bond type (e.g. +/// pdbx_entity_branch_link.value_order) to OST bond_order +/// +/// \param value_order abbreviation detemrining the bond order +DLLEXPORT_OST_IO unsigned char MMCifValueOrderToOSTBondOrder( + const StringRef value_order); + +/// \brief Translate an OST bond_order to mmCIF value_order +/// +/// \param bond_order OST bond order +DLLEXPORT_OST_IO String OSTBondOrderToMMCifValueOrder( + const unsigned char bond_order); }} #endif + +// LocalWords: MMCifEntityDescMap diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py index cfa10798c248f5d58385fcee070088ea138f0566..20162281d3584abb31dfb024edb01c50baebe068 100644 --- a/modules/io/tests/test_io_mmcif.py +++ b/modules/io/tests/test_io_mmcif.py @@ -263,6 +263,55 @@ class TestMMCifInfo(unittest.TestCase): self.assertEqual(len(crambin_pdb.residues), 46) self.assertEqual(len(crambin_pdb.atoms), 327) + def test_mmcifinfo_entitybranch(self): + # test MMCifInfoEntityBranchLink + eh = mol.CreateEntity() + editor = eh.EditXCS(); + ch = editor.InsertChain("A"); + res1 = editor.AppendResidue(ch, "BMA"); + res2 = editor.AppendResidue(ch, "MAN"); + atom1 = editor.InsertAtom(res2, "C1", geom.Vec3()); + atom2 = editor.InsertAtom(res1, "O3", geom.Vec3()); + branch = io.MMCifInfoEntityBranchLink(atom1, atom2, 1) + self.assertEqual(branch.atom1.qualified_name, "A.MAN2.C1") + self.assertEqual(branch.bond_order, 1) + + branch.ConnectBranchLink(editor) + self.assertEqual(atom2.GetBondPartners()[0].qualified_name, "A.MAN2.C1") + + # test entity_branches_ + ch = editor.InsertChain("B"); + res1 = editor.AppendResidue(ch, "NAG"); + res2 = editor.AppendResidue(ch, "NAG"); + atom3 = editor.InsertAtom(res2, "C1", geom.Vec3()); + atom4 = editor.InsertAtom(res1, "O4", geom.Vec3()); + info = io.MMCifInfo() + info.AddEntityBranchLink("A", atom1, atom2, 1) + info.AddEntityBranchLink(ch.name, atom3, atom4, 1) + + blinks = info.GetEntityBranchLinks() + self.assertEqual(blinks[0].GetAtom1().qualified_name, "A.MAN2.C1") + self.assertEqual(blinks[0].atom2.qualified_name, "A.BMA1.O3") + self.assertEqual(blinks[0].GetBondOrder(), 1) + self.assertEqual(blinks[1].atom1.qualified_name, "B.NAG2.C1") + self.assertEqual(blinks[1].GetAtom2().qualified_name, "B.NAG1.O4") + self.assertEqual(blinks[1].GetBondOrder(), 1) + + info.ConnectBranchLinks() + self.assertEqual(atom4.GetBondPartners()[0].qualified_name, "B.NAG2.C1") + + chain_names = info.GetEntityBranchChainNames() + self.assertEqual(chain_names, ['A', 'B']) + chains = info.GetEntityBranchChains() + self.assertEqual(chains[0].name, 'A') + self.assertEqual(chains[1].name, 'B') + + blinks = info.GetEntityBranchByChain('B') + self.assertEqual(len(blinks), 1) + self.assertEqual(blinks[0].atom1.qualified_name, "B.NAG2.C1") + blinks = info.GetEntityBranchByChain('C') + self.assertEqual(len(blinks), 0) + if __name__== '__main__': from ost import testutils testutils.RunTests() diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index efb35d54ff84a5772b7193ba0a0c2382880585c2..5370d2ce1eeb733bbba23b2ff52fadaf2cd22274 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -22,6 +22,7 @@ #include <ost/io/io_exception.hh> #include <ost/io/mol/mmcif_info.hh> +#include <ost/mol/mol.hh> using namespace ost; using namespace ost::io; @@ -278,6 +279,28 @@ BOOST_AUTO_TEST_CASE(mmcif_info_revisions) BOOST_TEST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_info_branch) +{ + BOOST_TEST_MESSAGE(" Running mmcif_info_branch tests..."); + + // create a dummy entity to start an editor... + mol::EntityHandle eh = mol::CreateEntity(); + mol::XCSEditor editor = eh.EditXCS(); + mol::ChainHandle ch = editor.InsertChain("A"); + mol::ResidueHandle res1 = editor.AppendResidue(ch, "NAG"); + mol::ResidueHandle res2 = editor.AppendResidue(ch, "NAG"); + // create AtomHandles for testing + mol::AtomHandle atom1 = editor.InsertAtom(res2, "C1",geom::Vec3()); + mol::AtomHandle atom2 = editor.InsertAtom(res1, "O4",geom::Vec3()); + + MMCifInfoEntityBranchLink branch1(atom1, atom2, 1); + BOOST_CHECK(branch1.GetAtom1().GetQualifiedName() == "A.NAG2.C1"); + BOOST_CHECK(branch1.GetAtom2().GetQualifiedName() == "A.NAG1.O4"); + BOOST_CHECK(branch1.GetBondOrder() == 1); + + BOOST_TEST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_info) { BOOST_TEST_MESSAGE(" Running mmcif_info tests..."); @@ -311,6 +334,56 @@ BOOST_AUTO_TEST_CASE(mmcif_info) BOOST_CHECK(info.GetRevisions().GetSize() == 0); + // simple check that we can add branch links + mol::EntityHandle eh = mol::CreateEntity(); + mol::XCSEditor editor = eh.EditXCS(); + mol::ChainHandle ch1 = editor.InsertChain("A"); + mol::ResidueHandle res11 = editor.AppendResidue(ch1, "NAG"); + mol::ResidueHandle res12 = editor.AppendResidue(ch1, "NAG"); + // create AtomHandles for testing + mol::AtomHandle atom11 = editor.InsertAtom(res12, "C1",geom::Vec3()); + mol::AtomHandle atom12 = editor.InsertAtom(res11, "O4",geom::Vec3()); + mol::ChainHandle ch2 = editor.InsertChain("B"); + mol::ResidueHandle res21 = editor.AppendResidue(ch2, "BMA"); + mol::ResidueHandle res22 = editor.AppendResidue(ch2, "MAN"); + // create AtomHandles for testing + mol::AtomHandle atom21 = editor.InsertAtom(res22, "C1",geom::Vec3()); + mol::AtomHandle atom22 = editor.InsertAtom(res21, "O3",geom::Vec3()); + info.AddEntityBranchLink(ch1.GetName(), atom11, atom12, 1); + info.AddEntityBranchLink(ch2.GetName(), atom21, atom22, 1); + std::vector<MMCifInfoEntityBranchLink> blinks = info.GetEntityBranchLinks(); + + BOOST_CHECK(blinks.size() == 2); + BOOST_CHECK(blinks[0].GetAtom1().GetQualifiedName() == "A.NAG2.C1"); + BOOST_CHECK(blinks[0].GetAtom2().GetQualifiedName() == "A.NAG1.O4"); + BOOST_CHECK(blinks[0].GetBondOrder() == 1); + BOOST_CHECK(blinks[1].GetAtom1().GetQualifiedName() == "B.MAN2.C1"); + BOOST_CHECK(blinks[1].GetAtom2().GetQualifiedName() == "B.BMA1.O3"); + BOOST_CHECK(blinks[1].GetBondOrder() == 1); + + // check that branch links get bonds + info.ConnectBranchLinks(); + + BOOST_CHECK(atom11.GetBondPartners()[0] == atom12); + BOOST_CHECK(atom22.GetBondPartners()[0] == atom21); + + // check chain(name) retrieval works + std::vector<String> chain_names = info.GetEntityBranchChainNames(); + BOOST_CHECK(chain_names[0] == "A"); + BOOST_CHECK(chain_names[1] == "B"); + + // check chain(handle) retrieval works + mol::ChainHandleList chains = info.GetEntityBranchChains(); + BOOST_CHECK(chains[0].GetName() == "A"); + BOOST_CHECK(chains[1].GetName() == "B"); + + // check retrieval of links by chain name + std::vector<MMCifInfoEntityBranchLink> cblinks = + info.GetEntityBranchByChain("A"); + BOOST_CHECK(cblinks.size() == 1); + cblinks = info.GetEntityBranchByChain("C"); + BOOST_CHECK(cblinks.size() == 0); + BOOST_TEST_MESSAGE(" done."); } diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 21efd569f65aa472f69954a19d96ffc683cc4d16..b6fa90f1ffcc8cc9bc2491b6964ae6fd08b7b489 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -61,6 +61,7 @@ public: using MMCifReader::ParseStruct; using MMCifReader::ParseStructConf; using MMCifReader::ParseStructSheetRange; + using MMCifReader::ParsePdbxEntityBranch; using MMCifReader::TryStoreIdx; using MMCifReader::SetRestrictChains; using MMCifReader::SetReadSeqRes; @@ -310,9 +311,13 @@ BOOST_AUTO_TEST_CASE(mmcif_unknown_entity_type) columns.push_back(StringRef("polymer", 7)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntity(columns)); columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("2", 1)); columns.push_back(StringRef("non-polymer", 11)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntity(columns)); columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("3", 1)); columns.push_back(StringRef("water", 5)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntity(columns)); BOOST_TEST_MESSAGE(" done."); @@ -320,6 +325,8 @@ BOOST_AUTO_TEST_CASE(mmcif_unknown_entity_type) // negative BOOST_TEST_MESSAGE(" unknown type..."); columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("4", 1)); columns.push_back(StringRef("foo", 3)); BOOST_CHECK_THROW(tmmcif_p.ParseEntity(columns), Error); BOOST_TEST_MESSAGE(" done."); @@ -403,20 +410,6 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) seq::SequenceHandle curr = seqres.FindSequence("A"); BOOST_CHECK(curr.GetString() == "VTI"); - BOOST_TEST_MESSAGE(" testing missing corresponding entity entry..."); - { - mol::EntityHandle eh = mol::CreateEntity(); - std::vector<StringRef> columns; - TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); - - tmmcif_h.SetCategory(StringRef("entity_poly", 11)); - tmmcif_h.Add(StringRef("entity_id", 9)); - tmmcif_p.OnBeginLoop(tmmcif_h); - - columns.push_back(StringRef("1", 1)); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); - } - BOOST_TEST_MESSAGE(" done."); BOOST_TEST_MESSAGE(" testing type recognition..."); { TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); @@ -467,9 +460,8 @@ columns.push_back(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid", columns.push_back(StringRef("other", 5)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); columns.pop_back(); - columns.pop_back(); columns.push_back(StringRef("badbadprion", 11)); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); + BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), ost::Error); columns.pop_back(); } BOOST_TEST_MESSAGE(" done."); @@ -1424,4 +1416,50 @@ BOOST_AUTO_TEST_CASE(mmcif_test_revisions_new) BOOST_TEST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_pdbx_entity_branch_tests) +{ + BOOST_TEST_MESSAGE(" Running mmcif_pdbx_entity_branch_tests..."); + IOProfile profile; + StarLoopDesc tmmcif_h; + + mol::EntityHandle eh = mol::CreateEntity(); + MMCifReader mmcif_p("testfiles/mmcif/atom_site.mmcif", eh, profile); + + mmcif_p.Parse(); + + BOOST_TEST_MESSAGE(" testing chain type recognition..."); + { + TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + std::vector<StringRef> columns; + + // create corresponding entity entry + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("entity", 6)); + tmmcif_h.Add(StringRef("id", 2)); + tmmcif_h.Add(StringRef("type", 4)); + tmmcif_p.OnBeginLoop(tmmcif_h); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("branched", 8)); + tmmcif_p.ParseEntity(columns); + columns.pop_back(); + columns.pop_back(); + + // build dummy pdbx_entity_branch header + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("pdbx_entity_branch", 18)); + tmmcif_h.Add(StringRef("entity_id", 9)); + tmmcif_h.Add(StringRef("type", 4)); + tmmcif_p.OnBeginLoop(tmmcif_h); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("oligosaccharide", 15)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParsePdbxEntityBranch(columns)); + columns.pop_back(); + + columns.push_back(StringRef("ordinarysugar", 13)); + BOOST_CHECK_THROW(tmmcif_p.ParsePdbxEntityBranch(columns), ost::Error); + } + BOOST_TEST_MESSAGE(" done."); + BOOST_TEST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/mol/alg/pymod/export_molck.cc b/modules/mol/alg/pymod/export_molck.cc index 5578bd0504cf39c91f1eab59388be03ac1b92ffb..9671434ba012bf41f2d7494479e905092130cbe8 100644 --- a/modules/mol/alg/pymod/export_molck.cc +++ b/modules/mol/alg/pymod/export_molck.cc @@ -120,7 +120,8 @@ void export_Molck() .def_readwrite("assign_elem", &MolckSettings::assign_elem); def("MapNonStandardResidues", &MapNonStandardResidues, (arg("ent"), - arg("lib"))); + arg("lib"), + arg("log_diags")=false)); def("RemoveAtoms", &RemoveAtoms, (arg("ent"), arg("lib"), diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 1e7b008861df783c0970fbc2af4a14975f939f38..3a4d0d292748f1c8f7546468d8f8b4657c9d10c2 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -1972,6 +1972,7 @@ here. ``CHAINTYPE_POLY_SAC_L``, ``CHAINTYPE_POLY_DN_RN``, ``CHAINTYPE_UNKNOWN``, ``CHAINTYPE_MACROLIDE``, ``CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE``, ``CHAINTYPE_POLY_PEPTIDE_DN_RN``, + ``CHAINTYPE_BRANCHED``, ``CHAINTYPE_OLIGOSACCHARIDE``, ``CHAINTYPE_N_CHAINTYPES`` Where ``CHAINTYPE_N_CHAINTYPES`` holds the number of different types available. diff --git a/modules/mol/base/pymod/export_chain.cc b/modules/mol/base/pymod/export_chain.cc index de8e693029106d8f09fce71a6b85ca115e793a3e..1664c13a33ed03c1fbdc404bbac9b081522c8e4a 100644 --- a/modules/mol/base/pymod/export_chain.cc +++ b/modules/mol/base/pymod/export_chain.cc @@ -137,6 +137,8 @@ void export_Chain() .value("CHAINTYPE_MACROLIDE", CHAINTYPE_MACROLIDE) .value("CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE", CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE) .value("CHAINTYPE_POLY_PEPTIDE_DN_RN", CHAINTYPE_POLY_PEPTIDE_DN_RN) + .value("CHAINTYPE_BRANCHED", CHAINTYPE_BRANCHED) + .value("CHAINTYPE_OLIGOSACCHARIDE", CHAINTYPE_OLIGOSACCHARIDE) .value("CHAINTYPE_N_CHAINTYPES", CHAINTYPE_N_CHAINTYPES) .export_values() ; diff --git a/modules/mol/base/src/chain_type.cc b/modules/mol/base/src/chain_type.cc index 2e553cff8d33428180afffb6edcccb947a762a55..6694aa017ab0c7cf89ba6e8d5cce6d2b495940f2 100644 --- a/modules/mol/base/src/chain_type.cc +++ b/modules/mol/base/src/chain_type.cc @@ -34,6 +34,8 @@ ChainType ChainTypeFromString(StringRef identifier) return CHAINTYPE_WATER; } else if (StringRef("macrolide", 9) == identifier) { return CHAINTYPE_MACROLIDE; + } else if (StringRef("branched", 8) == identifier) { + return CHAINTYPE_BRANCHED; // chain types as found in the entity_poly category of a mmcif file } else if (StringRef("polypeptide(D)", 14) == identifier) { return CHAINTYPE_POLY_PEPTIDE_D; @@ -54,6 +56,8 @@ ChainType ChainTypeFromString(StringRef identifier) return CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE; } else if (StringRef("peptide nucleic acid", 20) == identifier) { return CHAINTYPE_POLY_PEPTIDE_DN_RN; + } else if (StringRef("oligosaccharide", 15) == identifier) { + return CHAINTYPE_OLIGOSACCHARIDE; } else if (StringRef("other", 5) == identifier) { return CHAINTYPE_UNKNOWN; } @@ -79,6 +83,8 @@ String StringFromChainType(ChainType type) return "water"; } else if (CHAINTYPE_MACROLIDE == type) { return "macrolide"; + } else if (CHAINTYPE_BRANCHED == type) { + return "branched"; // chain types as found in the entity_poly category of a mmcif file } else if (CHAINTYPE_POLY_PEPTIDE_D == type) { return "polypeptide(D)"; @@ -98,6 +104,8 @@ String StringFromChainType(ChainType type) return "cyclic-pseudo-peptide"; } else if (CHAINTYPE_POLY_PEPTIDE_DN_RN == type) { return "peptide nucleic acid"; + } else if (CHAINTYPE_OLIGOSACCHARIDE == type) { + return "oligosaccharide"; } else if (CHAINTYPE_UNKNOWN == type) { return "other"; } diff --git a/modules/mol/base/src/chain_type.hh b/modules/mol/base/src/chain_type.hh index 3338db7fe16a87641b86f0bef3bc00f1b0f701aa..08ce4dcc6462a726a367eaf34708b789d684dcc0 100644 --- a/modules/mol/base/src/chain_type.hh +++ b/modules/mol/base/src/chain_type.hh @@ -44,6 +44,8 @@ typedef enum { CHAINTYPE_MACROLIDE, ///< macrolide CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE, ///< cyclic-pseudo-peptide CHAINTYPE_POLY_PEPTIDE_DN_RN, ///< peptide nucleic acid + CHAINTYPE_BRANCHED, ///< carbohydrate + CHAINTYPE_OLIGOSACCHARIDE, ///< oligosaccharide (branched carbohydrate) CHAINTYPE_N_CHAINTYPES ///< no. of chain types } ChainType; diff --git a/modules/mol/base/src/impl/chain_impl.hh b/modules/mol/base/src/impl/chain_impl.hh index feb03df4e2ca827c930a92b4192141cf691b8d8e..a3a7a97dcec720f024c39a2bcf73ff0bc108e9b6 100644 --- a/modules/mol/base/src/impl/chain_impl.hh +++ b/modules/mol/base/src/impl/chain_impl.hh @@ -69,7 +69,8 @@ public: { return type_==CHAINTYPE_POLY || this->IsPolypeptide() || this->IsPolynucleotide() || this->IsPolysaccharide() || - type_==CHAINTYPE_POLY_PEPTIDE_DN_RN; + type_==CHAINTYPE_POLY_PEPTIDE_DN_RN || + type_==CHAINTYPE_OLIGOSACCHARIDE; } /// \brief whether the chain is a polysaccharide bool IsPolysaccharide() const diff --git a/modules/mol/base/tests/test_chain.cc b/modules/mol/base/tests/test_chain.cc index 3fca04ca0c3505922531bc44ed1b60e00fc6f505..2154bdeab94555e73c89892847e6f9cac1759af4 100644 --- a/modules/mol/base/tests/test_chain.cc +++ b/modules/mol/base/tests/test_chain.cc @@ -319,6 +319,18 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(!ch1.IsPolysaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); + e.SetChainType(ch1, CHAINTYPE_BRANCHED); + BOOST_CHECK(ch1.GetType() == CHAINTYPE_BRANCHED); + BOOST_CHECK(!ch1.IsPolymer()); + BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsPolypeptide()); + BOOST_CHECK(!ch1.IsPolynucleotide()); + e.SetChainType(ch1, CHAINTYPE_OLIGOSACCHARIDE); + BOOST_CHECK(ch1.GetType() == CHAINTYPE_OLIGOSACCHARIDE); + BOOST_CHECK(ch1.IsPolymer()); + BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsPolypeptide()); + BOOST_CHECK(!ch1.IsPolynucleotide()); // string -> chain type BOOST_CHECK(ChainTypeFromString("polymer") == CHAINTYPE_POLY); @@ -345,6 +357,9 @@ BOOST_AUTO_TEST_CASE(chain_type) CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE); BOOST_CHECK(ChainTypeFromString("peptide nucleic acid") == CHAINTYPE_POLY_PEPTIDE_DN_RN); + BOOST_CHECK(ChainTypeFromString("branched") == CHAINTYPE_BRANCHED); + BOOST_CHECK(ChainTypeFromString("oligosaccharide") == + CHAINTYPE_OLIGOSACCHARIDE); BOOST_CHECK_THROW(ChainTypeFromString("supposed to fail"), Error); @@ -371,6 +386,9 @@ BOOST_AUTO_TEST_CASE(chain_type) "cyclic-pseudo-peptide"); BOOST_CHECK(StringFromChainType(CHAINTYPE_POLY_PEPTIDE_DN_RN) == "peptide nucleic acid"); + BOOST_CHECK(StringFromChainType(CHAINTYPE_BRANCHED) == "branched"); + BOOST_CHECK(StringFromChainType(CHAINTYPE_OLIGOSACCHARIDE) == + "oligosaccharide"); BOOST_CHECK_THROW(StringFromChainType(CHAINTYPE_N_CHAINTYPES), Error); }