diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index cf5c490169643b809329691ae48047940ce9e1c9..dd43aa093bfbcf3431b576b7dc4db643cf0e394f 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -157,9 +157,15 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) indices_[E_TYPE] = header.GetIndex("type"); indices_[DETAILS] = header.GetIndex("details"); return true; - } - /*else if (header.GetCategory()=="entity_poly") { - } else if (header.GetCategory()=="pdbx_poly_seq_scheme") { + } else if (header.GetCategory()=="entity_poly") { + category_ = ENTITY_POLY; + category_counts_[category_]++; + // mandatory + this->TryStoreIdx(ENTITY_ID, "entity_id", header); + // optional + indices_[EP_TYPE] = header.GetIndex("type"); + return true; + } /*else if (header.GetCategory()=="pdbx_poly_seq_scheme") { } else if (header.GetCategory()=="pdbx_struct_assembly") { } else if (header.GetCategory()=="struct_conf") { }*/ @@ -467,6 +473,55 @@ void MMCifParser::ParseEntity(const std::vector<StringRef>& columns) } } +void MMCifParser::ParseEntityPoly(const std::vector<StringRef>& columns) +{ + // we assume that the entity cat. ALWAYS comes before the entity_poly cat. + // search entity + MMCifEntityDescMap::iterator edm_it = + entity_desc_map_.find(columns[indices_[ENTITY_ID]].str()); + + // store values in description map + if (edm_it != entity_desc_map_.end()) { + if (indices_[EP_TYPE] != -1) { + if(StringRef("polypeptide(D)", 14) == columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_PEPTIDE_D; + } else if(StringRef("polypeptide(L)", 14) == columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_PEPTIDE_L; + } else if(StringRef("polydeoxyribonucleotide", 23) == + columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_DN; + } else if(StringRef("polyribonucleotide", 18) == + columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_RN; + } else if(StringRef("polysaccharide(D)", 17) == + columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_SAC_D; + } else if(StringRef("polysaccharide(L)", 17) == + columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_SAC_L; + } else if(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid", + 49) == columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_POLY_DN_RN; + } else if(StringRef("other", + 5) == columns[indices_[EP_TYPE]]) { + edm_it->second.type = CHAINTYPE_UNKNOWN; + } else { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Unrecognised polymner type '" + + columns[indices_[EP_TYPE]].str() + + "' found.", + this->GetCurrentLinenum())); + } + } + } else { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "'entity_poly' category defined before 'entity' for id '" + + columns[indices_[ENTITY_ID]].str() + + "' or missing.", + this->GetCurrentLinenum())); + } +} + void MMCifParser::OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -478,6 +533,11 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, case ENTITY: LOG_TRACE("processing entity entry"); this->ParseEntity(columns); + break; + case ENTITY_POLY: + LOG_TRACE("processing entity_poly entry"); + this->ParseEntityPoly(columns); + break; default: return; } diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index f720fadc3f887a9ab0097673783fc5d590829fd4..b63e5c51d5de8f25aa8c2fdf461cf8ff33f0c929 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -155,6 +155,11 @@ public: /// \param columns data row void ParseEntity(const std::vector<StringRef>& columns); + /// \brief Fetch MMCif entity_poly information + /// + /// \param columns data row + void ParseEntityPoly(const std::vector<StringRef>& columns); + private: /// \enum magic numbers of this class typedef enum { @@ -191,10 +196,17 @@ private: DETAILS ///< special aspects of the entity } EntityItems; + /// \enum items of the entity_poly category + typedef enum { + ENTITY_ID, ///< pointer to entity.id + EP_TYPE ///< type of polymer + } EntityPolyItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, ENTITY, + ENTITY_POLY, DONT_KNOW } MMCifCategory; diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 45166d2e24c44e57c224b89d48300313791ab2f2..7134e2f9e472fc97c5c0dbbe9a30f18be81067b3 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -49,6 +49,7 @@ public: using MMCifParser::ParseAtomIdent; using MMCifParser::ParseAndAddAtom; using MMCifParser::ParseEntity; + using MMCifParser::ParseEntityPoly; using MMCifParser::TryStoreIdx; }; @@ -276,10 +277,10 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_tests) mmcif_p.Parse(); ch = eh.FindChain("A"); BOOST_CHECK(ch.IsValid()); - BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY); + BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY_PEPTIDE_L); ch = eh.FindChain("C"); BOOST_CHECK(ch.IsValid()); - BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY); + BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY_PEPTIDE_L); ch = eh.FindChain("O"); BOOST_CHECK(ch.IsValid()); BOOST_CHECK(ch.GetType() == CHAINTYPE_WATER); @@ -315,6 +316,94 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_tests) BOOST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) +{ + BOOST_MESSAGE(" Running mmcif_entity_poly_tests..."); + mol::ChainHandle ch; + IOProfile profile; + StarLoopDesc tmmcif_h; + // positive + // negative: unknown polymer type + mol::EntityHandle eh = mol::CreateEntity(); + MMCifParser mmcif_p("testfiles/mmcif/atom_site.mmcif", eh, profile); + + mmcif_p.Parse(); + + + BOOST_MESSAGE(" testing missing corresponding entity entry..."); + { + mol::EntityHandle eh = mol::CreateEntity(); + std::vector<StringRef> columns; + TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + + tmmcif_h.SetCategory(StringRef("entity_poly", 11)); + tmmcif_h.Add(StringRef("entity_id", 9)); + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.push_back(StringRef("1", 1)); + BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); + } + BOOST_MESSAGE(" done."); + BOOST_MESSAGE(" testing type recognition..."); + { + TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + std::vector<StringRef> columns; + + // create corresponding entity entry + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("entity", 6)); + tmmcif_h.Add(StringRef("id", 2)); + tmmcif_h.Add(StringRef("type", 4)); + tmmcif_p.OnBeginLoop(tmmcif_h); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("polymer", 7)); + tmmcif_p.ParseEntity(columns); + columns.pop_back(); + columns.pop_back(); + + // build dummy entity_poly header + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("entity_poly", 11)); + tmmcif_h.Add(StringRef("entity_id", 9)); + tmmcif_h.Add(StringRef("type", 4)); + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("polypeptide(D)", 14)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.push_back(StringRef("polypeptide(L)", 14)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.push_back(StringRef("polydeoxyribonucleotide", 23)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.push_back(StringRef("polyribonucleotide", 18)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.push_back(StringRef("polysaccharide(D)", 17)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.push_back(StringRef("polysaccharide(L)", 17)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); +columns.push_back(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid", + 49)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.push_back(StringRef("other", 5)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); + columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("badbadprion", 11)); + BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); + columns.pop_back(); + } + BOOST_MESSAGE(" done."); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_parseatomident) { BOOST_MESSAGE(" Running mmcif_parseatomident tests..."); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index cfee5fe4684527b81e3ebdf741232b0f7ae3b1fb..5987ec4614cd6f9bba79fdd94a10e0413bf848f1 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -15,6 +15,11 @@ _entity.details ; 5 water . +_entity_poly.entity_id 1 +_entity_poly.type 'polypeptide(L)' +_entity_poly.nstd_linkage no +_entity_poly.nstd_monomer no + loop_ _atom_site.group_PDB _atom_site.type_symbol