diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 758862eeb797c966b2336edd499ba00a930b065a..da1bed9d710d3ec33a4c7dfc16cbc02f6812348e 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -179,6 +179,23 @@ of the annotation available. :type cif_chain_id: :class:`str` :returns: atom_site.auth_asym_id as :class:`str` + .. method:: AddPDBCMMCifhainTr(pdb_chain_id, cif_chain_id) + + Set up a translation for a certain PDB chain name to the mmCIF chain name. + + :param pdb_chain_id: atom_site.label_asym_id + :type pdb_chain_id: :class:`str` + :param cif_chain_id: atom_site.auth_asym_id + :type cif_chain_id: :class:`str` + + .. method:: GetPDBMMCifChainTr(pdb_chain_id) + + Get the translation of a certain PDB chain name to the mmCIF chain name. + + :param pdb_chain_id: atom_site.auth_asym_id + :type pdb_chain_id: :class:`str` + :returns: atom_site.label_asym_id as :class:`str` + .. class:: MMCifInfoCitation This stores citation information from an input file. @@ -813,4 +830,4 @@ of the annotation available. .. LocalWords: cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE .. LocalWords: SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif .. LocalWords: biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym -.. LocalWords: auth GetMMCifPDBChainTr +.. LocalWords: auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index f94c257e647a6499992dea5172169a029ddd8d85..f2527066afca1d2d715c97dcc1413276eafa7a6b 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -282,6 +282,8 @@ void export_mmcif_io() .def("GetObsoleteInfo", &MMCifInfo::GetObsoleteInfo) .def("AddMMCifPDBChainTr", &MMCifInfo::AddMMCifPDBChainTr) .def("GetMMCifPDBChainTr", &MMCifInfo::GetMMCifPDBChainTr) + .def("AddPDBMMCifChainTr", &MMCifInfo::AddPDBMMCifChainTr) + .def("GetPDBMMCifChainTr", &MMCifInfo::GetPDBMMCifChainTr) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index 140676fe6c409c03e2da8a427c2a502da2a936c3..29f0db7463188068b3584a49457126d7ec1601d0 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -40,6 +40,24 @@ String MMCifInfo::GetMMCifPDBChainTr(String cif) const return tr_it->second; } +void MMCifInfo::AddPDBMMCifChainTr(String pdb, String cif) +{ + std::map<String, String>::iterator tr_it = pdb_2_cif_chain_id_.find(pdb); + if (tr_it != pdb_2_cif_chain_id_.end()) { + throw IOException("PDB chain id '"+ pdb +"' is already mapped to '"+ + tr_it->second+"'."); + } + pdb_2_cif_chain_id_.insert(std::pair<String, String>(pdb, cif)); +} + +String MMCifInfo::GetPDBMMCifChainTr(String pdb) const +{ + std::map<String, String>::const_iterator tr_it = + pdb_2_cif_chain_id_.find(pdb); + if (tr_it == pdb_2_cif_chain_id_.end()) { return ""; } + return tr_it->second; +} + void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) { // find citation diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 68aa79befa08c11015178075d7daceea3d86c051..3eebf54b2369947c9d007e616c4d33c2f968750c 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -22,6 +22,7 @@ #include <vector> #include <map> #include <boost/shared_ptr.hpp> +#include <ost/seq/sequence_list.hh> #include <ost/geom/geom.hh> #include <ost/string_ref.hh> #include <ost/io/module_config.hh> @@ -783,6 +784,18 @@ public: /// \return chain name as used in the PDB file (auth_asym_id) String GetMMCifPDBChainTr(String cif) const; + /// \brief Add a new PDB/ mmCIF chain name tuple. + /// + /// \param pdb chain name as used by the PDB file (auth_asym_id) + /// \param cif chain name as used in the mmCIF file (label_asym_id) + void AddPDBMMCifChainTr(String pdb, String cif); + + /// \brief Get a CIF chain name for a PDB chain name + /// + /// \param pdb chain name as used by the mmCIF file (auth_asym_id) + /// \return chain name as used in the PDB file (label_asym_id) + String GetPDBMMCifChainTr(String pdb) const; + /// \brief Add a biounit /// /// \param bu biounit to be added @@ -861,6 +874,7 @@ private: std::vector<MMCifInfoTransOpPtr> transops_; MMCifInfoStructRefs struct_refs_; std::map<String, String> cif_2_pdb_chain_id_; + std::map<String, String> pdb_2_cif_chain_id_; }; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 6f35d4a2f206286578d20339fff710a1d0d1a2eb..762939ca817800366ee487ffadef9da493f5103a 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -500,8 +500,6 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) // store entity id chain_id_pairs_.push_back(std::pair<mol::ChainHandle,String>(curr_chain_, columns[indices_[LABEL_ENTITY_ID]].str())); - // store mmCIF - PDB chain name mapping - info_.AddMMCifPDBChainTr(cif_chain_name, auth_chain_name); } assert(curr_chain_.IsValid()); } else if (chain_id_pairs_.back().second != // unit test @@ -1541,6 +1539,7 @@ void MMCifReader::OnEndData() // process chain types std::vector<std::pair<mol::ChainHandle, String> >::const_iterator css; MMCifEntityDescMap::const_iterator edm_it; + String pdb_auth_chain_name; for (css = chain_id_pairs_.begin(); css != chain_id_pairs_.end(); ++css) { edm_it = entity_desc_map_.find(css->second); @@ -1550,6 +1549,9 @@ void MMCifReader::OnEndData() if (edm_it->second.seqres.length() > 0) { seqres_.AddSequence(seq::CreateSequence(css->first.GetName(), edm_it->second.seqres)); + pdb_auth_chain_name = css->first.GetStringProp("pdb_auth_chain_name"); + info_.AddMMCifPDBChainTr(css->first.GetName(), pdb_auth_chain_name); + info_.AddPDBMMCifChainTr(pdb_auth_chain_name, css->first.GetName()); } else if (edm_it->second.type!=mol::CHAINTYPE_WATER) { // mark everything that doesn't have SEQRES as ligand and isn't of type // water as ligand diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index d5a1ac1a465edbbe121d173b4167244adf804b27..65e4fdaecb42ec54fb50d3c579f24ba8b7ae8adb 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -208,6 +208,11 @@ BOOST_AUTO_TEST_CASE(mmcif_info) BOOST_CHECK("B" == info.GetMMCifPDBChainTr("A")); BOOST_CHECK("" == info.GetMMCifPDBChainTr("C")); + info.AddPDBMMCifChainTr("A", "B"); + BOOST_CHECK_THROW(info.AddPDBMMCifChainTr("A", "B"), IOException); + BOOST_CHECK("B" == info.GetPDBMMCifChainTr("A")); + BOOST_CHECK("" == info.GetPDBMMCifChainTr("C")); + BOOST_MESSAGE(" done."); } diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index ce890cf9d3be305019cbc431726433df8a06beee..68b2fbb5769c6e735ec7e30cb7311f002f9b3346 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -168,10 +168,10 @@ ATOM C CB ILE A 13 . 1 21.236 34.463 16.492 1.00 22.67 . 13 21 ? A ATOM C CG1 ILE A 13 . 1 20.478 33.469 17.371 1.00 22.14 . 13 22 ? A ATOM C CG2 ILE A 13 . 1 21.357 33.986 15.016 1.00 21.75 . 13 23 ? A # - - - - data truncated for brevity - - - - -HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? A -HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? A -HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? A -HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? A +HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? C +HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? C +HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? C +HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? C # - - - - data truncated for brevity - - - - # chain to be ignored by 'restrict_chains' feature ATOM N N ILE Z 1 . 1 23.664 33.855 16.884 1.00 22.08 . 1 17 ? Z