From 491c71b4e8a7290464f8c5f82015ae0b2f2a4fa1 Mon Sep 17 00:00:00 2001 From: Stefan Bienert <stefan.bienert@unibas.ch> Date: Fri, 20 Jul 2012 15:10:26 +0200 Subject: [PATCH] (cherry-pick) Added PDB chain name to Cif chain name map. Solved conflicts: modules/io/doc/mmcif.rst modules/io/pymod/export_mmcif_io.cc modules/io/src/mol/mmcif_info.cc modules/io/src/mol/mmcif_info.hh modules/io/tests/test_mmcif_info.cc --- modules/io/doc/mmcif.rst | 39 ++++++++++++++++++- modules/io/pymod/export_mmcif_io.cc | 4 ++ modules/io/src/mol/mmcif_info.cc | 36 +++++++++++++++++ modules/io/src/mol/mmcif_info.hh | 27 +++++++++++++ modules/io/src/mol/mmcif_reader.cc | 4 ++ modules/io/tests/test_mmcif_info.cc | 12 +++++- .../io/tests/testfiles/mmcif/atom_site.mmcif | 8 ++-- 7 files changed, 124 insertions(+), 6 deletions(-) diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index e172b34a3..38934cc79 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -160,6 +160,42 @@ of the annotation available. .. method:: GetStructDetails() + .. method:: AddMMCifPDBChainTr(cif_chain_id, pdb_chain_id) + + Set up a translation for a certain mmCIF chain name to the traditional PDB + chain name. + + :param cif_chain_id: atom_site.label_asym_id + :type cif_chain_id: :class:`str` + :param pdb_chain_id: atom_site.auth_asym_id + :type pdb_chain_id: :class:`str` + + .. method:: GetMMCifPDBChainTr(cif_chain_id) + + Get the translation of a certain mmCIF chain name to the traditional PDB + chain name. + + :param cif_chain_id: atom_site.label_asym_id + :type cif_chain_id: :class:`str` + :returns: atom_site.auth_asym_id as :class:`str` + + .. method:: AddPDBCMMCifhainTr(pdb_chain_id, cif_chain_id) + + Set up a translation for a certain PDB chain name to the mmCIF chain name. + + :param pdb_chain_id: atom_site.label_asym_id + :type pdb_chain_id: :class:`str` + :param cif_chain_id: atom_site.auth_asym_id + :type cif_chain_id: :class:`str` + + .. method:: GetPDBMMCifChainTr(pdb_chain_id) + + Get the translation of a certain PDB chain name to the mmCIF chain name. + + :param pdb_chain_id: atom_site.auth_asym_id + :type pdb_chain_id: :class:`str` + :returns: atom_site.label_asym_id as :class:`str` + .. class:: MMCifInfoCitation This stores citation information from an input file. @@ -794,4 +830,5 @@ of the annotation available. .. LocalWords: cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE .. LocalWords: SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif -.. LocalWords: biounits biounit uniprot UNP seqs +.. LocalWords: biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym +.. LocalWords: auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 6654393be..18ec3713b 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -280,6 +280,10 @@ void export_mmcif_io() .def("GetStructDetails", &MMCifInfo::GetStructDetails) .def("SetObsoleteInfo", &MMCifInfo::SetObsoleteInfo) .def("GetObsoleteInfo", &MMCifInfo::GetObsoleteInfo) +.def("AddMMCifPDBChainTr", &MMCifInfo::AddMMCifPDBChainTr) + .def("GetMMCifPDBChainTr", &MMCifInfo::GetMMCifPDBChainTr) + .def("AddPDBMMCifChainTr", &MMCifInfo::AddPDBMMCifChainTr) + .def("GetPDBMMCifChainTr", &MMCifInfo::GetPDBMMCifChainTr) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index c8565a2e8..29f0db746 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -22,6 +22,42 @@ namespace ost { namespace io { +void MMCifInfo::AddMMCifPDBChainTr(String cif, String pdb) +{ + std::map<String, String>::iterator tr_it = cif_2_pdb_chain_id_.find(cif); + if (tr_it != cif_2_pdb_chain_id_.end()) { + throw IOException("mmCIF chain id '"+ cif +"' is already mapped to '"+ + tr_it->second+"'."); + } + cif_2_pdb_chain_id_.insert(std::pair<String, String>(cif, pdb)); +} + +String MMCifInfo::GetMMCifPDBChainTr(String cif) const +{ + std::map<String, String>::const_iterator tr_it = + cif_2_pdb_chain_id_.find(cif); + if (tr_it == cif_2_pdb_chain_id_.end()) { return ""; } + return tr_it->second; +} + +void MMCifInfo::AddPDBMMCifChainTr(String pdb, String cif) +{ + std::map<String, String>::iterator tr_it = pdb_2_cif_chain_id_.find(pdb); + if (tr_it != pdb_2_cif_chain_id_.end()) { + throw IOException("PDB chain id '"+ pdb +"' is already mapped to '"+ + tr_it->second+"'."); + } + pdb_2_cif_chain_id_.insert(std::pair<String, String>(pdb, cif)); +} + +String MMCifInfo::GetPDBMMCifChainTr(String pdb) const +{ + std::map<String, String>::const_iterator tr_it = + pdb_2_cif_chain_id_.find(pdb); + if (tr_it == pdb_2_cif_chain_id_.end()) { return ""; } + return tr_it->second; +} + void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) { // find citation diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index b240e5a1c..3eebf54b2 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -22,6 +22,7 @@ #include <vector> #include <map> #include <boost/shared_ptr.hpp> +#include <ost/seq/sequence_list.hh> #include <ost/geom/geom.hh> #include <ost/string_ref.hh> #include <ost/io/module_config.hh> @@ -771,6 +772,30 @@ public: /// \return experiment resolution Real GetResolution() const { return resolution_; } + /// \brief Add a new mmCIF/ PDB chain name tuple. + /// + /// \param cif chain name as used by the mmCIF file (label_asym_id) + /// \param pdb chain name as used in the PDB file (auth_asym_id) + void AddMMCifPDBChainTr(String cif, String pdb); + + /// \brief Get a PDB chain name for a CIF chain name + /// + /// \param cif chain name as used by the mmCIF file (label_asym_id) + /// \return chain name as used in the PDB file (auth_asym_id) + String GetMMCifPDBChainTr(String cif) const; + + /// \brief Add a new PDB/ mmCIF chain name tuple. + /// + /// \param pdb chain name as used by the PDB file (auth_asym_id) + /// \param cif chain name as used in the mmCIF file (label_asym_id) + void AddPDBMMCifChainTr(String pdb, String cif); + + /// \brief Get a CIF chain name for a PDB chain name + /// + /// \param pdb chain name as used by the mmCIF file (auth_asym_id) + /// \return chain name as used in the PDB file (label_asym_id) + String GetPDBMMCifChainTr(String pdb) const; + /// \brief Add a biounit /// /// \param bu biounit to be added @@ -848,6 +873,8 @@ private: std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; MMCifInfoStructRefs struct_refs_; + std::map<String, String> cif_2_pdb_chain_id_; + std::map<String, String> pdb_2_cif_chain_id_; }; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 2bcdee810..762939ca8 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -1539,6 +1539,7 @@ void MMCifReader::OnEndData() // process chain types std::vector<std::pair<mol::ChainHandle, String> >::const_iterator css; MMCifEntityDescMap::const_iterator edm_it; + String pdb_auth_chain_name; for (css = chain_id_pairs_.begin(); css != chain_id_pairs_.end(); ++css) { edm_it = entity_desc_map_.find(css->second); @@ -1548,6 +1549,9 @@ void MMCifReader::OnEndData() if (edm_it->second.seqres.length() > 0) { seqres_.AddSequence(seq::CreateSequence(css->first.GetName(), edm_it->second.seqres)); + pdb_auth_chain_name = css->first.GetStringProp("pdb_auth_chain_name"); + info_.AddMMCifPDBChainTr(css->first.GetName(), pdb_auth_chain_name); + info_.AddPDBMMCifChainTr(pdb_auth_chain_name, css->first.GetName()); } else if (edm_it->second.type!=mol::CHAINTYPE_WATER) { // mark everything that doesn't have SEQRES as ligand and isn't of type // water as ligand diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 3d2674797..65e4fdaec 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -202,7 +202,17 @@ BOOST_AUTO_TEST_CASE(mmcif_info) #else BOOST_CHECK_CLOSE(info.GetResolution(), 1.9f, 0.001f); #endif - + + info.AddMMCifPDBChainTr("A", "B"); + BOOST_CHECK_THROW(info.AddMMCifPDBChainTr("A", "B"), IOException); + BOOST_CHECK("B" == info.GetMMCifPDBChainTr("A")); + BOOST_CHECK("" == info.GetMMCifPDBChainTr("C")); + + info.AddPDBMMCifChainTr("A", "B"); + BOOST_CHECK_THROW(info.AddPDBMMCifChainTr("A", "B"), IOException); + BOOST_CHECK("B" == info.GetPDBMMCifChainTr("A")); + BOOST_CHECK("" == info.GetPDBMMCifChainTr("C")); + BOOST_MESSAGE(" done."); } diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index ce890cf9d..68b2fbb57 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -168,10 +168,10 @@ ATOM C CB ILE A 13 . 1 21.236 34.463 16.492 1.00 22.67 . 13 21 ? A ATOM C CG1 ILE A 13 . 1 20.478 33.469 17.371 1.00 22.14 . 13 22 ? A ATOM C CG2 ILE A 13 . 1 21.357 33.986 15.016 1.00 21.75 . 13 23 ? A # - - - - data truncated for brevity - - - - -HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? A -HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? A -HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? A -HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? A +HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? C +HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? C +HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? C +HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? C # - - - - data truncated for brevity - - - - # chain to be ignored by 'restrict_chains' feature ATOM N N ILE Z 1 . 1 23.664 33.855 16.884 1.00 22.08 . 1 17 ? Z -- GitLab