diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 0c15c3767167c9245acd645b4b7ceff2d1991640..91c35928c21c39ee486e91e3b4d7818cafa2eb00 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -1023,6 +1023,138 @@ void MMCifParser::ParseStruct(const std::vector<StringRef>& columns) info_.SetStructDetails(details); } +MMCifParser::MMCifSecStructElement MMCifParser::DetermineSecStructType( + const StringRef& type) const +{ + if (type == StringRef("HELX_P", 6)) { + return MMCIF_HELIX; + } else if (type == StringRef("HELX_OT_P", 9)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_P", 9)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_OT_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_AL_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_GA_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_OM_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_PI_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_27_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_3T_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_PP_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_P", 9)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_OT_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_AL_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_GA_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_OM_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_PI_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_27_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_3T_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_PP_P", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_N", 6)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_OT_N", 9)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_N", 9)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_OT_N", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_A_N", 11)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_B_N", 11)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_RH_Z_N", 11)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_N", 9)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_OT_N", 12)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_A_N", 11)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_B_N", 11)) { + return MMCIF_HELIX; + } + else if (type == StringRef("HELX_LH_Z_N", 11)) { + return MMCIF_HELIX; + } + else if (type == StringRef("TURN_P", 6)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_OT_P", 9)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_TY1_P", 10)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_TY1P_P", 11)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_TY2_P", 10)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_TY2P_P", 11)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_TY3_P", 10)) { + return MMCIF_TURN; + } + else if (type == StringRef("TURN_TY3P_P", 11)) { + return MMCIF_TURN; + } + else if (type == StringRef("STRN", 4)) { + return MMCIF_STRAND; + } + + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Unknown secondary structure class found: "+ + type.str(), + this->GetCurrentLinenum())); +} + void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) { StringRef chain_name; @@ -1038,7 +1170,7 @@ void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) chain_name = columns[indices_[BEG_AUTH_ASYM_ID]]; } else { // unit test throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "foo", +"Chain name by author requested but 'struct_conf.beg_auth_asym_id' is not set.", this->GetCurrentLinenum())); } } else { // unit test @@ -1048,7 +1180,14 @@ void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) to_res_num(e_res_num, ' '), chain_name.str(), columns[indices_[CONF_TYPE_ID]].str()}; - secstruct_list_.push_back(hse); + + MMCifSecStructElement type = + DetermineSecStructType(columns[indices_[CONF_TYPE_ID]]); + if (type == MMCIF_HELIX) { // unit test helix and strand reading + secstruct_list_.push_back(hse); + } else if (type == MMCIF_STRAND) { + secstruct_list_.push_back(hse); + } } void MMCifParser::OnDataRow(const StarLoopDesc& header, diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 5bb2aeffd3bf7dab04a3ebd7ac8485bbb0bd04d0..2863d4537feb79f080bc9f52f95963d4f5e22e7c 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -275,7 +275,21 @@ protected: /// \param columns data row void ParseStructConf(const std::vector<StringRef>& columns); + /// \struct types of secondary structure + typedef enum { + MMCIF_HELIX, + MMCIF_STRAND, + MMCIF_TURN + } MMCifSecStructElement; + + /// \brief Check whether an element was classified sheet or helix + /// + /// \param type Type to be classified + MMCifSecStructElement DetermineSecStructType(const StringRef& type) const; + /// \brief Transform data from struct_conf entry into secondary structure + /// + /// \param ent Entity to assign secondary structure to void AssignSecStructure(mol::EntityHandle ent); private: diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 99876427785ea3d741f081394d0036a4256df38d..997dd89ccbf2656abd210fedb56686906ded3327 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -66,6 +66,11 @@ public: using MMCifParser::ClearState; using MMCifParser::ConvertSEQRES; using MMCifParser::GetInfo; + using MMCifParser::DetermineSecStructType; + using MMCifParser::MMCifSecStructElement; + using MMCifParser::MMCIF_HELIX; + using MMCifParser::MMCIF_TURN; + using MMCifParser::MMCIF_STRAND; }; void SetAtomSiteHeader(StarLoopDesc* mmcif_h) @@ -798,6 +803,144 @@ BOOST_AUTO_TEST_CASE(mmcif_struct_tests) BOOST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_struct_conf_tests) +{ + BOOST_MESSAGE(" Running mmcif_struct_conf_tests..."); + mol::EntityHandle eh = mol::CreateEntity(); + TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + + BOOST_MESSAGE(" testing type validation"); + StringRef type = StringRef("HELX_P", 6); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_OT_P", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_P", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_OT_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_AL_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_GA_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_OM_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_PI_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_27_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_3T_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_PP_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_P", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_OT_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_AL_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_GA_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_OM_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_PI_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_27_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_3T_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_PP_P", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_N", 6); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_OT_N", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_N", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_OT_N", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_A_N", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_B_N", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_RH_Z_N", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_N", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_OT_N", 12); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_A_N", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_B_N", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("HELX_LH_Z_N", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_HELIX); + type = StringRef("TURN_P", 6); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_OT_P", 9); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_TY1_P", 10); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_TY1P_P", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_TY2_P", 10); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_TY2P_P", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_TY3_P", 10); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("TURN_TY3P_P", 11); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_TURN); + type = StringRef("STRN", 4); + BOOST_CHECK(tmmcif_p.DetermineSecStructType(type) == + TestMMCifParserProtected::MMCIF_STRAND); + type = StringRef("Foo", 3); + BOOST_CHECK_THROW(tmmcif_p.DetermineSecStructType(type), IOException); + + BOOST_MESSAGE(" done."); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_parseatomident) { BOOST_MESSAGE(" Running mmcif_parseatomident tests..."); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index fdc059e508ab95ddb82f9c6d7290bf84cf6b0fbb..b8e2473936cc72486c5e06856bf2093456e6caba 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -106,10 +106,10 @@ _struct_conf.end_label_seq_id _struct_conf.details HELX1 HELX_RH_AL_P ARG A 87 GLN A 92 . HELX2 HELX_RH_AL_P ARG B 287 GLN B 292 . -STRN1 STRN_P PRO A 1 LEU A 5 . -STRN2 STRN_P CYS B 295 PHE B 299 . -STRN3 STRN_P CYS A 95 PHE A 299 . -STRN4 STRN_P PRO B 201 LEU B 205 . +STRN1 STRN PRO A 1 LEU A 5 . +STRN2 STRN CYS B 295 PHE B 299 . +STRN3 STRN CYS A 95 PHE A 299 . +STRN4 STRN PRO B 201 LEU B 205 . TURN1 TURN_TY1P_P ILE A 15 GLN A 18 . TURN2 TURN_TY2_P GLY A 49 GLY A 52 . TURN3 TURN_TY1P_P ILE A 55 HIS A 69 .