diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index da194092927732e1430ca7a63db1b50e45330ac2..7fba2f5c6df41b711f56bc8e8426c99b174ce5a2 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -44,14 +44,18 @@ The following categories of a mmCIF file are considered by the reader: Notes: -* structures in mmCIF format can have two chain names. The "new" chain name +* Structures in mmCIF format can have two chain names. The "new" chain name extracted from ``atom_site.label_asym_id`` is used to name the chains in the :class:`~ost.mol.EntityHandle`. The "old" (author provided) chain name is extracted from ``atom_site.auth_asym_id`` for the first atom of the chain. It is added as string property named "pdb_auth_chain_name" to the - :class:`~ost.mol.ChainHandle` and mapped into :class:`MMCifInfo` as - :meth:`~MMCifInfo.GetMMCifPDBChainTr` & :meth:`~MMCifInfo.GetPDBMMCifChainTr`. - + :class:`~ost.mol.ChainHandle`. The mapping is also stored in + :class:`MMCifInfo` as :meth:`~MMCifInfo.GetMMCifPDBChainTr` and + :meth:`~MMCifInfo.GetPDBMMCifChainTr` if SEQRES records are read in + :func:`~ost.io.LoadMMCIF` and a non-empty SEQRES record exists for that chain + (this should exclude ligands and water). +* Molecular entities in mmCIF are identified by an ``entity.id``. Each chain is + mapped to an ID in :class:`MMCifInfo` as :meth:`~MMCifInfo.GetMMCifEntityIdTr`. Info Classes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -204,15 +208,15 @@ of the annotation available. :param cif_chain_id: atom_site.label_asym_id :type cif_chain_id: :class:`str` - :returns: atom_site.auth_asym_id as :class:`str` + :returns: atom_site.auth_asym_id as :class:`str` (empty if no mapping) .. method:: AddPDBMMCifChainTr(pdb_chain_id, cif_chain_id) Set up a translation for a certain PDB chain name to the mmCIF chain name. - :param pdb_chain_id: atom_site.label_asym_id + :param pdb_chain_id: atom_site.auth_asym_id :type pdb_chain_id: :class:`str` - :param cif_chain_id: atom_site.auth_asym_id + :param cif_chain_id: atom_site.label_asym_id :type cif_chain_id: :class:`str` .. method:: GetPDBMMCifChainTr(pdb_chain_id) @@ -221,7 +225,24 @@ of the annotation available. :param pdb_chain_id: atom_site.auth_asym_id :type pdb_chain_id: :class:`str` - :returns: atom_site.label_asym_id as :class:`str` + :returns: atom_site.label_asym_id as :class:`str` (empty if no mapping) + + .. method:: AddMMCifEntityIdTr(cif_chain_id, entity_id) + + Set up a translation for a certain mmCIF chain name to the mmCIF entity ID. + + :param cif_chain_id: atom_site.label_asym_id + :type cif_chain_id: :class:`str` + :param entity_id: atom_site.label_entity_id + :type entity_id: :class:`str` + + .. method:: GetMMCifEntityIdTr(cif_chain_id) + + Get the translation of a certain mmCIF chain name to the mmCIF entity ID. + + :param cif_chain_id: atom_site.label_asym_id + :type cif_chain_id: :class:`str` + :returns: atom_site.label_entity_id as :class:`str` (empty if no mapping) .. method:: AddRevision(num, date, status) diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 7c6a5b69750a362f01842ad13c52b555353b017a..447761e76bebe047f6fb1fdc674ca8f2cd6d38fa 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -322,6 +322,8 @@ void export_mmcif_io() .def("GetMMCifPDBChainTr", &MMCifInfo::GetMMCifPDBChainTr) .def("AddPDBMMCifChainTr", &MMCifInfo::AddPDBMMCifChainTr) .def("GetPDBMMCifChainTr", &MMCifInfo::GetPDBMMCifChainTr) + .def("AddMMCifEntityIdTr", &MMCifInfo::AddMMCifEntityIdTr) + .def("GetMMCifEntityIdTr", &MMCifInfo::GetMMCifEntityIdTr) .def("SetRevisionsDateOriginal", &MMCifInfo::SetRevisionsDateOriginal) .def("AddRevision", &MMCifInfo::AddRevision) .def("GetRevisions", &MMCifInfo::GetRevisions) diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index 27934ad09079feee4a2b7e1e031ad6182dc472d9..3e0fa35370df8d80480920e3f14f5d0f95ddd7ff 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -58,6 +58,24 @@ String MMCifInfo::GetPDBMMCifChainTr(String pdb) const return tr_it->second; } +void MMCifInfo::AddMMCifEntityIdTr(String cif, String ent_id) +{ + std::map<String, String>::iterator tr_it = cif_2_entity_id_.find(cif); + if (tr_it != cif_2_entity_id_.end()) { + throw IOException("mmCIF chain id '" + cif + "' is already mapped to " + "entity id '" + tr_it->second + "'."); + } + cif_2_entity_id_.insert(std::pair<String, String>(cif, ent_id)); +} + +String MMCifInfo::GetMMCifEntityIdTr(String cif) const +{ + std::map<String, String>::const_iterator tr_it = + cif_2_entity_id_.find(cif); + if (tr_it == cif_2_entity_id_.end()) { return ""; } + return tr_it->second; +} + void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) { // find citation diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 8087e7e0718972ddc1343899af4d69696fda1f3e..8054c160f5386130bc94d181b7a04c81258c6b22 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -940,6 +940,18 @@ public: /// \return chain name as used in the PDB file (label_asym_id) String GetPDBMMCifChainTr(String pdb) const; + /// \brief Add a new mmCIF chain name / entity ID tuple. + /// + /// \param cif chain name as used by the mmCIF file (label_asym_id) + /// \param ent_id entity ID as used by the mmCIF file (label_entity_id) + void AddMMCifEntityIdTr(String cif, String ent_id); + + /// \brief Get the entity ID for a CIF chain name + /// + /// \param cif chain name as used by the mmCIF file (label_asym_id) + /// \return entity ID as used by the mmCIF file (label_entity_id) + String GetMMCifEntityIdTr(String cif) const; + /// \brief Add a biounit /// /// \param bu biounit to be added @@ -1043,6 +1055,7 @@ private: MMCifInfoStructRefs struct_refs_; std::map<String, String> cif_2_pdb_chain_id_; std::map<String, String> pdb_2_cif_chain_id_; + std::map<String, String> cif_2_entity_id_; }; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 9e2d95a21750a18f1873937c4a2807e75f8d702e..bbee2014c766b2c8b6589be70a035855c959e7ae 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -506,8 +506,10 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) curr_chain_.SetStringProp("pdb_auth_chain_name", auth_chain_name); ++chain_count_; // store entity id + String ent_id = columns[indices_[LABEL_ENTITY_ID]].str(); chain_id_pairs_.push_back(std::pair<mol::ChainHandle,String>(curr_chain_, - columns[indices_[LABEL_ENTITY_ID]].str())); + ent_id)); + info_.AddMMCifEntityIdTr(cif_chain_name, ent_id); } assert(curr_chain_.IsValid()); } else if (chain_id_pairs_.back().second != // unit test diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index d37b8f396198045128e8e5acce8598cafed9adb4..6c49768d2eb0806473cefe958c7b5d9f9482e79b 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -269,6 +269,11 @@ BOOST_AUTO_TEST_CASE(mmcif_info) BOOST_CHECK("B" == info.GetPDBMMCifChainTr("A")); BOOST_CHECK("" == info.GetPDBMMCifChainTr("C")); + info.AddMMCifEntityIdTr("A", "1"); + BOOST_CHECK_THROW(info.AddMMCifEntityIdTr("A", "1"), IOException); + BOOST_CHECK("1" == info.GetMMCifEntityIdTr("A")); + BOOST_CHECK("" == info.GetMMCifEntityIdTr("C")); + BOOST_CHECK(info.GetRevisions().GetSize() == 0); BOOST_TEST_MESSAGE(" done."); diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 77b2bcd14b9ae79c13a0a56def3af3aecfac7701..c5788c9238b030aa538e2629ce10504da94bb06a 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -97,6 +97,20 @@ void SetAtomSiteHeader(StarLoopDesc* mmcif_h) mmcif_h->Add(StringRef("Cartn_z", 7)); } +conop::CompoundLibPtr SetDefaultCompoundLib() { + // return NULL if not successful, else return newly set default lib + // REQ: OST_ROOT to be set + char * ost_root = getenv("OST_ROOT"); + if (!ost_root) return conop::CompoundLibPtr(); + SetPrefixPath(ost_root); + String lib_path = GetSharedDataPath() + "/compounds.chemlib"; + conop::CompoundLibPtr compound_lib = conop::CompoundLib::Load(lib_path); + if (compound_lib) { + conop::Conopology::Instance().SetDefaultLib(compound_lib); + } + return compound_lib; +} + BOOST_AUTO_TEST_SUITE( io ); BOOST_AUTO_TEST_CASE(mmcif_isvalidpdbident) @@ -140,21 +154,13 @@ BOOST_AUTO_TEST_CASE(mmcif_trystoreidx) BOOST_AUTO_TEST_CASE(mmcif_convert_seqres) { - char * ost_root=getenv("OST_ROOT"); - if(!ost_root){ - std::cout << "WARNING: skipping SEQRES import unit test. " - << "Rule-based processor is required" << std::endl; - return; - } - SetPrefixPath(ost_root); - String lib_path=GetSharedDataPath()+"/compounds.chemlib"; - conop::CompoundLibPtr compound_lib=conop::CompoundLib::Load(lib_path); + conop::CompoundLibPtr compound_lib = SetDefaultCompoundLib(); if (!compound_lib) { - std::cout << "WARNING: skipping SEQRES import unit test. Compound " - << "library is required" << std::endl; - return; + std::cout << "WARNING: skipping mmcif_convert_seqres unit test. " + << "Compound library is required" << std::endl; + return; } - conop::Conopology::Instance().SetDefaultLib(compound_lib); + mol::EntityHandle eh=mol::CreateEntity(); TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); @@ -402,21 +408,12 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_tests) BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) { - char * ost_root=getenv("OST_ROOT"); - if(!ost_root){ - std::cout << "WARNING: skipping SEQRES import unit test. " - << "Rule-based processor is required" << std::endl; + if (!SetDefaultCompoundLib()) { + std::cout << "WARNING: skipping mmcif_entity_poly_tests unit test. " + << "Compound library is required" << std::endl; return; } - SetPrefixPath(ost_root); - String lib_path=GetSharedDataPath()+"/compounds.chemlib"; - conop::CompoundLibPtr compound_lib=conop::CompoundLib::Load(lib_path); - if (!compound_lib) { - std::cout << "WARNING: skipping SEQRES import unit test. Compound " - << "lib is required" << std::endl; - return; - } - conop::Conopology::Instance().SetDefaultLib(compound_lib); + BOOST_TEST_MESSAGE(" Running mmcif_entity_poly_tests..."); mol::ChainHandle ch; IOProfile profile; @@ -1301,4 +1298,58 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader) BOOST_TEST_MESSAGE(" done."); } +// helper for mmcif_test_chain_mappings +inline void CheckChainMap(mol::EntityHandle eh, const MMCifInfo& info, + const String& cif_name, const String& pdb_name, + bool check_info_map) { + // check chain + mol::ChainHandle ch = eh.FindChain(cif_name); + BOOST_CHECK(ch.IsValid()); + BOOST_CHECK(ch.HasProp("pdb_auth_chain_name")); + BOOST_CHECK(ch.GetStringProp("pdb_auth_chain_name") == pdb_name); + // info mapping + if (check_info_map) { + BOOST_CHECK(info.GetMMCifPDBChainTr(cif_name) == pdb_name); + BOOST_CHECK(info.GetPDBMMCifChainTr(pdb_name) == cif_name); + } +} + +BOOST_AUTO_TEST_CASE(mmcif_test_chain_mappings) +{ + BOOST_TEST_MESSAGE(" Running mmcif_test_chain_mappings tests..."); + + // check compound lib + bool compound_lib_available = SetDefaultCompoundLib(); + + // load data + mol::EntityHandle eh = mol::CreateEntity(); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + IOProfile profile; + MMCifReader mmcif_p(s, eh, profile); + if (compound_lib_available) { + mmcif_p.SetReadSeqRes(true); + } + BOOST_REQUIRE_NO_THROW(mmcif_p.Parse()); + const MMCifInfo& info = mmcif_p.GetInfo(); + + // check 1-to-1 mappings + CheckChainMap(eh, info, "A", "A", compound_lib_available); + CheckChainMap(eh, info, "C", "C", compound_lib_available); + CheckChainMap(eh, info, "O", "B", false); // water + CheckChainMap(eh, info, "Z", "Z", compound_lib_available); + + // check entity ID mapping + BOOST_CHECK(info.GetMMCifEntityIdTr("A") == "1"); + BOOST_CHECK(info.GetMMCifEntityIdTr("C") == "1"); + BOOST_CHECK(info.GetMMCifEntityIdTr("O") == "5"); + BOOST_CHECK(info.GetMMCifEntityIdTr("Z") == "1"); + + // check non-existent mappings + BOOST_CHECK(info.GetMMCifPDBChainTr("B") == ""); + BOOST_CHECK(info.GetPDBMMCifChainTr("O") == ""); + BOOST_CHECK(info.GetMMCifEntityIdTr("B") == ""); + + BOOST_TEST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_SUITE_END();