diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 6392ef4b11cc5f3eaff1fbd23536f0ffe03586b5..1b7f9a00382ad94035d2883a7bab0e921fe8c77e 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -362,7 +362,13 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) indices_[PDS_RECVD_INITIAL_DEPOSITION_DATE] = header.GetIndex("recvd_initial_deposition_date"); cat_available = true; - } + } else if (header.GetCategory() == "pdbx_entity_branch") { + category_ = PDBX_ENTITY_BRANCH; + // mandatory + this->TryStoreIdx(BR_ENTITY_ID, "entity_id", header); + this->TryStoreIdx(BR_ENTITY_TYPE, "type", header); + cat_available = true; + } category_counts_[category_]++; return cat_available; } @@ -1541,6 +1547,10 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing pdbx_database_status entry"); this->ParsePdbxDatabaseStatus(columns); break; + case PDBX_ENTITY_BRANCH: + LOG_TRACE("processing pdbx_entity_branch entry"); + this->ParsePdbxEntityBranch(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -1679,6 +1689,28 @@ void MMCifReader::ParseStructRefSeqDif(const std::vector<StringRef>& columns) } } +void MMCifReader::ParsePdbxEntityBranch(const std::vector<StringRef>& columns) +{ + // we assume that the entity cat. ALWAYS comes before the pdbx_entity_branch + // cat. + // search entity + MMCifEntityDescMap::iterator edm_it = + entity_desc_map_.find(columns[indices_[BR_ENTITY_ID]].str()); + + if (edm_it == entity_desc_map_.end()) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "'pdbx_entity_branch' category defined before 'entity' for id '" + + columns[indices_[BR_ENTITY_ID]].str() + + "' or missing.", + this->GetCurrentLinenum())); + } + + // store type + if (indices_[BR_ENTITY_TYPE] != -1) { + edm_it->second.type = mol::ChainTypeFromString(columns[indices_[EP_TYPE]]); + } +} + void MMCifReader::OnEndData() { mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 9147a234c4f4966b4ac861f45c48f0a2e7bee829..5e1e2204a9da08e6bdfbdd920f5fd1d8af7ccbc9 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -56,6 +56,7 @@ namespace ost { namespace io { /// \li struct_sheet_range /// \li pdbx_database_PDB_obs_spr /// \li database_PDB_rev +/// \li pdbx_entity_branch class DLLEXPORT_OST_IO MMCifReader : public StarParser { public: /// \brief create a MMCifReader @@ -326,6 +327,11 @@ protected: /// \param columns data row void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF pdbx_entity_branch information + /// + /// \param columns data row + void ParsePdbxEntityBranch(const std::vector<StringRef>& columns); + /// \struct types of secondary structure typedef enum { MMCIF_HELIX, @@ -561,6 +567,12 @@ private: PDS_RECVD_INITIAL_DEPOSITION_DATE, ///< date of initial deposition } PdbxDatabaseStatusItems; + /// \enum items of the pdbx_entity_branch category (pendant to entity_poly) + typedef enum { + BR_ENTITY_ID, ///< pointer to entity.id + BR_ENTITY_TYPE ///< type of branched molecular entity + } EntityBranchItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -584,6 +596,7 @@ private: PDBX_AUDIT_REVISION_HISTORY, PDBX_AUDIT_REVISION_DETAILS, PDBX_DATABASE_STATUS, + PDBX_ENTITY_BRANCH, DONT_KNOW } MMCifCategory; diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index ffb82993ce5c368bad66610b2567f7b48d67a390..3a4d0d292748f1c8f7546468d8f8b4657c9d10c2 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -1972,7 +1972,8 @@ here. ``CHAINTYPE_POLY_SAC_L``, ``CHAINTYPE_POLY_DN_RN``, ``CHAINTYPE_UNKNOWN``, ``CHAINTYPE_MACROLIDE``, ``CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE``, ``CHAINTYPE_POLY_PEPTIDE_DN_RN``, - ``CHAINTYPE_BRANCHED``, ``CHAINTYPE_N_CHAINTYPES`` + ``CHAINTYPE_BRANCHED``, ``CHAINTYPE_OLIGOSACCHARIDE``, + ``CHAINTYPE_N_CHAINTYPES`` Where ``CHAINTYPE_N_CHAINTYPES`` holds the number of different types available. diff --git a/modules/mol/base/pymod/export_chain.cc b/modules/mol/base/pymod/export_chain.cc index e81d6e1594171011792023f07ab4b59813d31332..1664c13a33ed03c1fbdc404bbac9b081522c8e4a 100644 --- a/modules/mol/base/pymod/export_chain.cc +++ b/modules/mol/base/pymod/export_chain.cc @@ -138,6 +138,7 @@ void export_Chain() .value("CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE", CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE) .value("CHAINTYPE_POLY_PEPTIDE_DN_RN", CHAINTYPE_POLY_PEPTIDE_DN_RN) .value("CHAINTYPE_BRANCHED", CHAINTYPE_BRANCHED) + .value("CHAINTYPE_OLIGOSACCHARIDE", CHAINTYPE_OLIGOSACCHARIDE) .value("CHAINTYPE_N_CHAINTYPES", CHAINTYPE_N_CHAINTYPES) .export_values() ; diff --git a/modules/mol/base/src/chain_type.cc b/modules/mol/base/src/chain_type.cc index 5b2fb1350466b127391dc55d266adbea9713361c..6694aa017ab0c7cf89ba6e8d5cce6d2b495940f2 100644 --- a/modules/mol/base/src/chain_type.cc +++ b/modules/mol/base/src/chain_type.cc @@ -34,6 +34,8 @@ ChainType ChainTypeFromString(StringRef identifier) return CHAINTYPE_WATER; } else if (StringRef("macrolide", 9) == identifier) { return CHAINTYPE_MACROLIDE; + } else if (StringRef("branched", 8) == identifier) { + return CHAINTYPE_BRANCHED; // chain types as found in the entity_poly category of a mmcif file } else if (StringRef("polypeptide(D)", 14) == identifier) { return CHAINTYPE_POLY_PEPTIDE_D; @@ -54,8 +56,8 @@ ChainType ChainTypeFromString(StringRef identifier) return CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE; } else if (StringRef("peptide nucleic acid", 20) == identifier) { return CHAINTYPE_POLY_PEPTIDE_DN_RN; - } else if (StringRef("branched", 8) == identifier) { - return CHAINTYPE_BRANCHED; + } else if (StringRef("oligosaccharide", 15) == identifier) { + return CHAINTYPE_OLIGOSACCHARIDE; } else if (StringRef("other", 5) == identifier) { return CHAINTYPE_UNKNOWN; } @@ -81,6 +83,8 @@ String StringFromChainType(ChainType type) return "water"; } else if (CHAINTYPE_MACROLIDE == type) { return "macrolide"; + } else if (CHAINTYPE_BRANCHED == type) { + return "branched"; // chain types as found in the entity_poly category of a mmcif file } else if (CHAINTYPE_POLY_PEPTIDE_D == type) { return "polypeptide(D)"; @@ -100,8 +104,8 @@ String StringFromChainType(ChainType type) return "cyclic-pseudo-peptide"; } else if (CHAINTYPE_POLY_PEPTIDE_DN_RN == type) { return "peptide nucleic acid"; - } else if (CHAINTYPE_BRANCHED == type) { - return "branched"; + } else if (CHAINTYPE_OLIGOSACCHARIDE == type) { + return "oligosaccharide"; } else if (CHAINTYPE_UNKNOWN == type) { return "other"; } diff --git a/modules/mol/base/src/chain_type.hh b/modules/mol/base/src/chain_type.hh index bfc664758b2c2f1c863aa6402bf9322f0ffbf8b4..08ce4dcc6462a726a367eaf34708b789d684dcc0 100644 --- a/modules/mol/base/src/chain_type.hh +++ b/modules/mol/base/src/chain_type.hh @@ -45,6 +45,7 @@ typedef enum { CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE, ///< cyclic-pseudo-peptide CHAINTYPE_POLY_PEPTIDE_DN_RN, ///< peptide nucleic acid CHAINTYPE_BRANCHED, ///< carbohydrate + CHAINTYPE_OLIGOSACCHARIDE, ///< oligosaccharide (branched carbohydrate) CHAINTYPE_N_CHAINTYPES ///< no. of chain types } ChainType; diff --git a/modules/mol/base/tests/test_chain.cc b/modules/mol/base/tests/test_chain.cc index 15838c9f0cf9223ba90504c889893ba911bd5765..4c2127670e1a65b67b0230a8a629809462e8712a 100644 --- a/modules/mol/base/tests/test_chain.cc +++ b/modules/mol/base/tests/test_chain.cc @@ -319,13 +319,18 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(!ch1.IsPolysaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); - e.SetChainType(ch1, CHAINTYPE_BRANCHED); BOOST_CHECK(ch1.GetType() == CHAINTYPE_BRANCHED); BOOST_CHECK(!ch1.IsPolymer()); BOOST_CHECK(!ch1.IsPolysaccharide()); BOOST_CHECK(!ch1.IsPolypeptide()); BOOST_CHECK(!ch1.IsPolynucleotide()); + e.SetChainType(ch1, CHAINTYPE_OLIGOSACCHARIDE); + BOOST_CHECK(ch1.GetType() == CHAINTYPE_OLIGOSACCHARIDE); + BOOST_CHECK(!ch1.IsPolymer()); + BOOST_CHECK(!ch1.IsPolysaccharide()); + BOOST_CHECK(!ch1.IsPolypeptide()); + BOOST_CHECK(!ch1.IsPolynucleotide()); // string -> chain type BOOST_CHECK(ChainTypeFromString("polymer") == CHAINTYPE_POLY); @@ -353,6 +358,8 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(ChainTypeFromString("peptide nucleic acid") == CHAINTYPE_POLY_PEPTIDE_DN_RN); BOOST_CHECK(ChainTypeFromString("branched") == CHAINTYPE_BRANCHED); + BOOST_CHECK(ChainTypeFromString("oligosaccharide") == + CHAINTYPE_OLIGOSACCHARIDE); BOOST_CHECK_THROW(ChainTypeFromString("supposed to fail"), Error); @@ -380,6 +387,8 @@ BOOST_AUTO_TEST_CASE(chain_type) BOOST_CHECK(StringFromChainType(CHAINTYPE_POLY_PEPTIDE_DN_RN) == "peptide nucleic acid"); BOOST_CHECK(StringFromChainType(CHAINTYPE_BRANCHED) == "branched"); + BOOST_CHECK(StringFromChainType(CHAINTYPE_OLIGOSACCHARIDE) == + "oligosaccharide"); BOOST_CHECK_THROW(StringFromChainType(CHAINTYPE_N_CHAINTYPES), Error); }