diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index 25c58ff9ced52ae52170239f132ccdc57c946674..dfa32874dec3b28ed93e6625b23534a7e4919ce7 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -195,4 +195,48 @@ MMCifInfoStructRefSeq::AddDif(int seq_rnum, const String& db_rnum, const String& return d; } +void MMCifInfo::AddEntityBranchLink(String chain_name, + mol::AtomHandle atom1, + mol::AtomHandle atom2) +{ + // check if element already exists + MMCifInfoEntityBranchMap::iterator blm_it = entity_branches_.find(chain_name); + if (blm_it == entity_branches_.end()) { + // `find` points to the end of the map so chain_name was not found + std::pair<MMCifInfoEntityBranchMap::iterator, bool> rit = + entity_branches_.insert(MMCifInfoEntityBranchMap::value_type(chain_name, + std::vector<MMCifInfoEntityBranch>())); + // let blm_it point to the new element so we can add to the vector + blm_it = rit.first; + } + // add new branch element + blm_it->second.push_back(MMCifInfoEntityBranch(atom1, atom2)); +} + +const std::vector<MMCifInfoEntityBranch> MMCifInfo::GetEntityBranchLinks() const +{ + std::vector<MMCifInfoEntityBranch> all_links; + MMCifInfoEntityBranchMap::const_iterator blm_it; + for (blm_it = entity_branches_.begin(); + blm_it != entity_branches_.end(); ++blm_it) { + std::copy(blm_it->second.begin(), blm_it->second.end(), + std::back_inserter(all_links)); + } + return all_links; +} + +void MMCifInfo::ConnectBranchLinks(mol::XCSEditor editor) +{ + MMCifInfoEntityBranchMap::iterator blm_it; + for (blm_it = entity_branches_.begin(); + blm_it != entity_branches_.end(); ++blm_it) { + for (std::vector<MMCifInfoEntityBranch>::iterator blv_it = + blm_it->second.begin(); + blv_it != blm_it->second.end(); + ++blv_it) { + editor.Connect(blv_it->GetAtom1(), blv_it->GetAtom2()); + } + } +} + }} //ns diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index c152229ef838e7ce1d0abb4d39688112d3662def..dde4589923fc55d1b6197a4f7f83b4f5ee92af49 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -26,6 +26,7 @@ #include <ost/geom/geom.hh> #include <ost/string_ref.hh> #include <ost/io/module_config.hh> +#include <ost/mol/mol.hh> namespace ost { namespace io { @@ -918,6 +919,21 @@ private: String details_; }; +/// \brief Store information on brnached structures (oligosaccharides) +/// +class DLLEXPORT_OST_IO MMCifInfoEntityBranch { +public: + MMCifInfoEntityBranch(mol::AtomHandle atom1, mol::AtomHandle atom2): + atom1_(atom1), atom2_(atom2) {} + mol::AtomHandle GetAtom1() const { return atom1_;} + mol::AtomHandle GetAtom2() const { return atom2_; } + +private: + mol::AtomHandle atom1_; + mol::AtomHandle atom2_; +}; +typedef std::map<String, std::vector<MMCifInfoEntityBranch> > MMCifInfoEntityBranchMap; + /// \brief container class for additional information from MMCif files /// /// \section mmcif annotation information @@ -1126,6 +1142,26 @@ public: { return revisions_; } + + /// \brief Add bond information for a branched entity + /// + /// \param chain_name chain the bond belongs to + /// \param atom1 first atom of the bond + /// \param atom2 second atom of the bond + void AddEntityBranchLink(String chain_name, + mol::AtomHandle atom1, + mol::AtomHandle atom2); + + /// \brief Get all links for all branched entities + /// + const std::vector<MMCifInfoEntityBranch> GetEntityBranchLinks() const; + //GetEntityBranchChains + //GetEntityBranchByChain + + /// \brief Connect all atoms listed as links for branched entities. + /// + void ConnectBranchLinks(mol::XCSEditor editor); + //protected: private: @@ -1140,10 +1176,11 @@ private: std::vector<MMCifInfoCitation> citations_; ///< list of citations std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; - MMCifInfoStructRefs struct_refs_; + MMCifInfoStructRefs struct_refs_; std::map<String, String> cif_2_pdb_chain_id_; std::map<String, String> pdb_2_cif_chain_id_; std::map<String, String> cif_2_entity_id_; + std::map<String, std::vector<MMCifInfoEntityBranch> > entity_branches_; }; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 1b7f9a00382ad94035d2883a7bab0e921fe8c77e..8eeb71924f49d0a30a03325ca9b493513f129cf0 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -90,6 +90,7 @@ void MMCifReader::ClearState() revisions_.clear(); revision_types_.clear(); database_PDB_rev_added_ = false; + entity_branch_link_map_.clear(); } void MMCifReader::SetRestrictChains(const String& restrict_chains) @@ -368,6 +369,23 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) this->TryStoreIdx(BR_ENTITY_ID, "entity_id", header); this->TryStoreIdx(BR_ENTITY_TYPE, "type", header); cat_available = true; + } else if (header.GetCategory() == "pdbx_entity_branch_link") { + category_ = PDBX_ENTITY_BRANCH_LINK; + // mandatory + this->TryStoreIdx(BL_ENTITY_ID, "entity_id", header); + this->TryStoreIdx(BL_ATOM_ID_1, "atom_id_1", header); + this->TryStoreIdx(BL_ATOM_ID_2, "atom_id_2", header); + this->TryStoreIdx(BL_COMP_ID_1, "comp_id_1", header); + this->TryStoreIdx(BL_COMP_ID_2, "comp_id_2", header); + this->TryStoreIdx(BL_ENTITY_BRANCH_LIST_NUM_1, "entity_branch_list_num_1", + header); + this->TryStoreIdx(BL_ENTITY_BRANCH_LIST_NUM_2, "entity_branch_list_num_2", + header); + // optional items + indices_[BL_ATOM_STEREO_CONFIG_1] = header.GetIndex("atom_stereo_config_1"); + indices_[BL_ATOM_STEREO_CONFIG_2] = header.GetIndex("atom_stereo_config_2"); + indices_[BL_VALUE_ORDER] = header.GetIndex("value_order"); + cat_available = true; } category_counts_[category_]++; return cat_available; @@ -1551,6 +1569,10 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing pdbx_entity_branch entry"); this->ParsePdbxEntityBranch(columns); break; + case PDBX_ENTITY_BRANCH_LINK: + LOG_TRACE("processing pdbx_entity_branch_link entry"); + this->ParsePdbxEntityBranchLink(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -1711,6 +1733,52 @@ void MMCifReader::ParsePdbxEntityBranch(const std::vector<StringRef>& columns) } } +void MMCifReader::ParsePdbxEntityBranchLink(const std::vector<StringRef>& columns) +{ + MMCifPdbxEntityBranchLink link_pair; + + String entity_id(columns[indices_[BL_ENTITY_ID]].str()); + + // list of entities -> pairs of info for link + link_pair.res_num_1 = + this->TryGetInt(columns[indices_[BL_ENTITY_BRANCH_LIST_NUM_1]], + "pdbx_entity_branch_link.entity_branch_list_num_1"); + link_pair.cmp_1 = columns[indices_[BL_COMP_ID_1]].str(); + link_pair.atm_nm_1 = columns[indices_[BL_ATOM_ID_1]].str(); + link_pair.res_num_2 = + this->TryGetInt(columns[indices_[BL_ENTITY_BRANCH_LIST_NUM_2]], + "pdbx_entity_branch_link.entity_branch_list_num_2"); + link_pair.cmp_2 = columns[indices_[BL_COMP_ID_2]].str(); + link_pair.atm_nm_2 = columns[indices_[BL_ATOM_ID_2]].str(); + + /*if (indices_[BL_ATOM_STEREO_CONFIG_1] != -1) { + char A = *columns[indices_[BL_ATOM_STEREO_CONFIG_1]].begin(); + }*/ + // check stereo values to be N S R + /*if (indices_[BL_ATOM_STEREO_CONFIG_2] != -1) { + }*/ + // check value order to be ... git grep bond_order + /*if (indices_[BL_VALUE_ORDER] != -1) { + }*/ + + std::pair<MMCifPdbxEntityBranchLinkMap::iterator, bool> rit; + + // check if element already exists + MMCifPdbxEntityBranchLinkMap::iterator blm_it = + entity_branch_link_map_.find(entity_id); + + // if the entity was not seen before, create it in the map + if (blm_it == entity_branch_link_map_.end()) { + rit = entity_branch_link_map_.insert( + MMCifPdbxEntityBranchLinkMap::value_type(entity_id, + std::vector<MMCifPdbxEntityBranchLink>())); + blm_it = rit.first; + } + + // add the link pair + blm_it->second.push_back(link_pair); +} + void MMCifReader::OnEndData() { mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); @@ -1718,10 +1786,12 @@ void MMCifReader::OnEndData() // process chain types std::vector<std::pair<mol::ChainHandle, String> >::const_iterator css; MMCifEntityDescMap::const_iterator edm_it; + MMCifPdbxEntityBranchLinkMap::const_iterator blm_it; + std::vector<MMCifPdbxEntityBranchLink>::const_iterator bl_it; String pdb_auth_chain_name; for (css = chain_id_pairs_.begin(); css != chain_id_pairs_.end(); ++css) { + // chain description edm_it = entity_desc_map_.find(css->second); - if (edm_it != entity_desc_map_.end()) { editor.SetChainType(css->first, edm_it->second.type); editor.SetChainDescription(css->first, edm_it->second.details); @@ -1745,6 +1815,21 @@ void MMCifReader::OnEndData() LOG_WARNING("No entity description found for atom_site.label_entity_id '" << css->second << "'"); } + // find + blm_it = entity_branch_link_map_.find(css->second); + // store linker pair + if (blm_it != entity_branch_link_map_.end()) { + for (bl_it = blm_it->second.begin(); bl_it != blm_it->second.end(); + ++bl_it) { + mol::ResidueHandle res1 = css->first.FindResidue(to_res_num( + bl_it->res_num_1, ' ')); + mol::ResidueHandle res2 = css->first.FindResidue(to_res_num( + bl_it->res_num_2, ' ')); + info_.AddEntityBranchLink(css->first.GetName(), + res1.FindAtom(bl_it->atm_nm_1), + res2.FindAtom(bl_it->atm_nm_2)); + } + } } // process citations (couple with authors diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 5e1e2204a9da08e6bdfbdd920f5fd1d8af7ccbc9..8d975600c4737a8e36e55277ec8e04c148817098 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -57,6 +57,7 @@ namespace ost { namespace io { /// \li pdbx_database_PDB_obs_spr /// \li database_PDB_rev /// \li pdbx_entity_branch +/// \li pdbx_entity_branch_link class DLLEXPORT_OST_IO MMCifReader : public StarParser { public: /// \brief create a MMCifReader @@ -332,6 +333,11 @@ protected: /// \param columns data row void ParsePdbxEntityBranch(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF pdbx_entity_branch_link information + /// + /// \param columns data row + void ParsePdbxEntityBranchLink(const std::vector<StringRef>& columns); + /// \struct types of secondary structure typedef enum { MMCIF_HELIX, @@ -573,6 +579,20 @@ private: BR_ENTITY_TYPE ///< type of branched molecular entity } EntityBranchItems; + /// \enum items of the pdbx_entity_branch_link category + typedef enum { + BL_ENTITY_ID, /// < pointer to entity.id + BL_ATOM_ID_1, /// < atom identifier (element + number) + BL_ATOM_ID_2, /// < atom identifier (element + number) + BL_COMP_ID_1, /// < tlc of component + BL_COMP_ID_2, /// < tlc of component + BL_ENTITY_BRANCH_LIST_NUM_1, /// < res. no. (pdbx_entity_branch_list.num) + BL_ENTITY_BRANCH_LIST_NUM_2, /// < res. no. (pdbx_entity_branch_list.num) + BL_ATOM_STEREO_CONFIG_1, /// < chiral configuration (R/ S) + BL_ATOM_STEREO_CONFIG_2, /// < chiral configuration (R/ S) + BL_VALUE_ORDER /// < bond order + } EntityBranchLinkItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -597,6 +617,7 @@ private: PDBX_AUDIT_REVISION_DETAILS, PDBX_DATABASE_STATUS, PDBX_ENTITY_BRANCH, + PDBX_ENTITY_BRANCH_LINK, DONT_KNOW } MMCifCategory; @@ -651,6 +672,18 @@ private: int minor; ///< minor version }; + /// \struct to keep entity_branch linker pairs while parsing + typedef struct { + int res_num_1; ///< index of the linked residue relative to its chain + String cmp_1; ///< tlc of residue (sugar) + String atm_nm_1; ///< (IUPAC) name of the linking atom + int res_num_2; ///< index of the linked residue relative to its chain + String cmp_2; ///< tlc of residue (sugar) + String atm_nm_2; ///< index of the linked residue relative to its chain + } MMCifPdbxEntityBranchLink; + typedef std::map<String, std::vector<MMCifPdbxEntityBranchLink> > + MMCifPdbxEntityBranchLinkMap; + // members MMCifCategory category_; int category_counts_[DONT_KNOW+1]; ///< overall no. of atom_site loops @@ -686,6 +719,8 @@ private: std::vector<MMCifRevisionDesc> revisions_; std::map<int, String> revision_types_; bool database_PDB_rev_added_; + // for entity_branch connections + MMCifPdbxEntityBranchLinkMap entity_branch_link_map_; }; }} diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index efb35d54ff84a5772b7193ba0a0c2382880585c2..25fccecea2f033714d51253ae3cfbed453173b69 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -22,6 +22,7 @@ #include <ost/io/io_exception.hh> #include <ost/io/mol/mmcif_info.hh> +#include <ost/mol/mol.hh> using namespace ost; using namespace ost::io; @@ -278,6 +279,27 @@ BOOST_AUTO_TEST_CASE(mmcif_info_revisions) BOOST_TEST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_info_branch) +{ + BOOST_TEST_MESSAGE(" Running mmcif_info_branch tests..."); + + // create a dummy entity to start an editor... + mol::EntityHandle eh = mol::CreateEntity(); + mol::XCSEditor editor = eh.EditXCS(); + mol::ChainHandle ch = editor.InsertChain("A"); + mol::ResidueHandle res1 = editor.AppendResidue(ch, "NAG"); + mol::ResidueHandle res2 = editor.AppendResidue(ch, "NAG"); + // create AtomHandles for testing + mol::AtomHandle atom1 = editor.InsertAtom(res2, "C1",geom::Vec3()); + mol::AtomHandle atom2 = editor.InsertAtom(res1, "O4",geom::Vec3()); + + MMCifInfoEntityBranch branch1(atom1, atom2); + BOOST_CHECK(branch1.GetAtom1().GetQualifiedName() == "A.NAG2.C1"); + BOOST_CHECK(branch1.GetAtom2().GetQualifiedName() == "A.NAG1.O4"); + + BOOST_TEST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_info) { BOOST_TEST_MESSAGE(" Running mmcif_info tests..."); @@ -311,6 +333,37 @@ BOOST_AUTO_TEST_CASE(mmcif_info) BOOST_CHECK(info.GetRevisions().GetSize() == 0); + // simple check that we can add branch links + mol::EntityHandle eh = mol::CreateEntity(); + mol::XCSEditor editor = eh.EditXCS(); + mol::ChainHandle ch1 = editor.InsertChain("A"); + mol::ResidueHandle res11 = editor.AppendResidue(ch1, "NAG"); + mol::ResidueHandle res12 = editor.AppendResidue(ch1, "NAG"); + // create AtomHandles for testing + mol::AtomHandle atom11 = editor.InsertAtom(res12, "C1",geom::Vec3()); + mol::AtomHandle atom12 = editor.InsertAtom(res11, "O4",geom::Vec3()); + mol::ChainHandle ch2 = editor.InsertChain("B"); + mol::ResidueHandle res21 = editor.AppendResidue(ch2, "BMA"); + mol::ResidueHandle res22 = editor.AppendResidue(ch2, "MAN"); + // create AtomHandles for testing + mol::AtomHandle atom21 = editor.InsertAtom(res22, "C1",geom::Vec3()); + mol::AtomHandle atom22 = editor.InsertAtom(res21, "O3",geom::Vec3()); + info.AddEntityBranchLink(ch1.GetName(), atom11, atom12); + info.AddEntityBranchLink(ch2.GetName(), atom21, atom22); + std::vector<MMCifInfoEntityBranch> blinks = info.GetEntityBranchLinks(); + + BOOST_CHECK(blinks.size() == 2); + BOOST_CHECK(blinks[0].GetAtom1().GetQualifiedName() == "A.NAG2.C1"); + BOOST_CHECK(blinks[0].GetAtom2().GetQualifiedName() == "A.NAG1.O4"); + BOOST_CHECK(blinks[1].GetAtom1().GetQualifiedName() == "B.MAN2.C1"); + BOOST_CHECK(blinks[1].GetAtom2().GetQualifiedName() == "B.BMA1.O3"); + + // check that branch links get bonds + info.ConnectBranchLinks(editor); + + BOOST_CHECK(atom11.GetBondPartners()[0] == atom12); + BOOST_CHECK(atom22.GetBondPartners()[0] == atom21); + BOOST_TEST_MESSAGE(" done."); }