diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index b0467c62a2ce3a4a5c74d50e9bb84a148eab892e..9c4f0d5a5bd1d7ae612ed9ca086e8b474a6ec283 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -34,7 +34,8 @@ The following categories of a mmCIF file are considered by the parser: :class:`MMCifInfoStructDetails`. * ``struct_conf``: Stores secondary structure information (practically helices) in the :class:`entity <ost.mol.EntityHandle>` -* ``struct_sheet_range`` +* ``struct_sheet_range``: Stores secondary structure information for sheets in + the :class:`entity <ost.mol.EntityHandle>` Info Classes diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index e1222db86c50d02de5fe78b78ce70c36deee6249..68f971177e315727ce7edc79100d245cd2ed35f7 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -277,6 +277,21 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) indices_[SC_BEG_AUTH_ASYM_ID] = header.GetIndex("beg_auth_asym_id"); indices_[SC_END_AUTH_ASYM_ID] = header.GetIndex("end_auth_asym_id"); cat_available = true; + } else if (header.GetCategory() == "struct_sheet_range") { + category_ = STRUCT_SHEET_RANGE; + // mandatory items + this->TryStoreIdx(SSR_BEG_LABEL_ASYM_ID, "beg_label_asym_id", header); + this->TryStoreIdx(SSR_BEG_LABEL_COMP_ID, "beg_label_comp_id", header); + this->TryStoreIdx(SSR_BEG_LABEL_SEQ_ID, "beg_label_seq_id", header); + this->TryStoreIdx(SSR_END_LABEL_ASYM_ID, "end_label_asym_id", header); + this->TryStoreIdx(SSR_END_LABEL_COMP_ID, "end_label_comp_id", header); + this->TryStoreIdx(SSR_END_LABEL_SEQ_ID, "end_label_seq_id", header); + this->TryStoreIdx(SSR_SHEET_ID, "sheet_id", header); + this->TryStoreIdx(SSR_ID, "id", header); + // optional items + indices_[SSR_BEG_AUTH_ASYM_ID] = header.GetIndex("beg_auth_asym_id"); + indices_[SSR_END_AUTH_ASYM_ID] = header.GetIndex("end_auth_asym_id"); + cat_available = true; } category_counts_[category_]++; return cat_available; @@ -1162,11 +1177,8 @@ void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) StringRef chain_name; int s_res_num; int e_res_num; - // fetch start and end - s_res_num = this->TryGetInt(columns[indices_[SC_BEG_LABEL_SEQ_ID]], - "struct_conf.beg_label_seq_id"); - e_res_num = this->TryGetInt(columns[indices_[SC_END_LABEL_SEQ_ID]], - "struct_conf.end_label_seq_id"); + + // fetch chain name, first if(auth_chain_id_) { if (indices_[SC_BEG_AUTH_ASYM_ID] != -1) { chain_name = columns[indices_[SC_BEG_AUTH_ASYM_ID]]; @@ -1181,6 +1193,11 @@ void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) if (restrict_chains_.size() == 0 || restrict_chains_.find(chain_name.str()) != String::npos) { + // fetch start and end + s_res_num = this->TryGetInt(columns[indices_[SC_BEG_LABEL_SEQ_ID]], + "struct_conf.beg_label_seq_id"); + e_res_num = this->TryGetInt(columns[indices_[SC_END_LABEL_SEQ_ID]], + "struct_conf.end_label_seq_id"); MMCifHSEntry hse = {to_res_num(s_res_num, ' '), to_res_num(e_res_num, ' '), chain_name.str()}; @@ -1195,6 +1212,41 @@ void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) } } +void MMCifParser::ParseStructSheetRange(const std::vector<StringRef>& columns) +{ + StringRef chain_name; + int s_res_num; + int e_res_num; + + if(auth_chain_id_) { + if (indices_[SSR_BEG_AUTH_ASYM_ID] != -1) { + chain_name = columns[indices_[SSR_BEG_AUTH_ASYM_ID]]; + } else { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, +"Chain name by author requested but 'struct_sheet_range.beg_auth_asym_id' is not set.", + this->GetCurrentLinenum())); + } + } else { + chain_name = columns[indices_[SSR_BEG_LABEL_ASYM_ID]]; + } + + // restrict_chains feature not unit tested, since its about to be changed in + // the future + if (restrict_chains_.size() == 0 || + restrict_chains_.find(chain_name.str()) != String::npos) { + + s_res_num = this->TryGetInt(columns[indices_[SSR_BEG_LABEL_SEQ_ID]], + "struct_sheet_range.beg_label_seq_id"); + e_res_num = this->TryGetInt(columns[indices_[SSR_END_LABEL_SEQ_ID]], + "struct_sheet_range.end_label_seq_id"); + + MMCifHSEntry hse = {to_res_num(s_res_num, ' '), + to_res_num(e_res_num, ' '), + chain_name.str()}; + strand_list_.push_back(hse); + } +} + void MMCifParser::OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -1247,6 +1299,10 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing struct_conf entry") this->ParseStructConf(columns); break; + case STRUCT_SHEET_RANGE: + LOG_TRACE("processing struct_sheet_range entry") + this->ParseStructSheetRange(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 6005831d807f4f73c46408e9471c4275788b7fde..3f6f974c26c6e6b68966597be5c1d1be7cc96de1 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -286,6 +286,11 @@ protected: /// \param columns data row void ParseStructConf(const std::vector<StringRef>& columns); + /// \brief Fetch MMCif struct_sheet_range (beta sheets) information + /// + /// \param columns data row + void ParseStructSheetRange(const std::vector<StringRef>& columns); + /// \struct types of secondary structure typedef enum { MMCIF_HELIX, @@ -441,6 +446,20 @@ private: SC_ID, ///< Unique identifier } StructConfItems; + /// \enum items of the struct_sheet_range category + typedef enum { + SSR_BEG_LABEL_ASYM_ID, ///< start, chain name (atom_site.label_asym_id) + SSR_BEG_LABEL_COMP_ID, ///< start, atom_site.label_comp_id + SSR_BEG_LABEL_SEQ_ID, ///< start, residue number (atom_site.label_seq_id) + SSR_END_LABEL_ASYM_ID, ///< end, chain name (atom_site.label_asym_id) + SSR_END_LABEL_COMP_ID, ///< end, atom_site.label_comp_id + SSR_END_LABEL_SEQ_ID, ///< end, residue number (atom_site.label_seq_id) + SSR_SHEET_ID, ///< unique identifier + SSR_ID, ///< link to struct_sheet.id + SSR_BEG_AUTH_ASYM_ID, ///< alternative start, (atom_site.auth_asym_id) + SSR_END_AUTH_ASYM_ID, ///< alternative end, (atom_site.auth_asym_id) + } StructSheetRangeItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 621a5856d436ea3188ac386818d761b145745fce..2c1fcfee333719a3a922059ad186c60b42066014 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -61,6 +61,7 @@ public: using MMCifParser::ParsePdbxStructOperList; using MMCifParser::ParseStruct; using MMCifParser::ParseStructConf; + using MMCifParser::ParseStructSheetRange; using MMCifParser::TryStoreIdx; using MMCifParser::SetRestrictChains; using MMCifParser::SetReadSeqRes; @@ -940,40 +941,95 @@ BOOST_AUTO_TEST_CASE(mmcif_struct_conf_tests) BOOST_CHECK_THROW(tmmcif_p.DetermineSecStructType(type), IOException); BOOST_MESSAGE(" done."); + BOOST_MESSAGE(" testing auth_chain_id switch..."); + StarLoopDesc tmmcif_h; + { + std::vector<StringRef> columns; + tmmcif_h.SetCategory(StringRef("struct_conf", 11)); + tmmcif_h.Add(StringRef("beg_label_asym_id", 17)); + tmmcif_h.Add(StringRef("beg_label_comp_id", 17)); + tmmcif_h.Add(StringRef("beg_label_seq_id", 16)); + tmmcif_h.Add(StringRef("conf_type_id", 12)); + tmmcif_h.Add(StringRef("end_label_asym_id", 17)); + tmmcif_h.Add(StringRef("end_label_comp_id", 17)); + tmmcif_h.Add(StringRef("end_label_seq_id", 16)); + tmmcif_h.Add(StringRef("id", 2)); + tmmcif_h.Add(StringRef("beg_auth_asym_id", 16)); + tmmcif_p.OnBeginLoop(tmmcif_h); + columns.push_back(StringRef("A", 1)); + columns.push_back(StringRef("ARG", 3)); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("HELX_RH_AL_P", 12)); + columns.push_back(StringRef("A", 1)); + columns.push_back(StringRef("ARG", 3)); + columns.push_back(StringRef("2", 1)); + columns.push_back(StringRef("DHLX1", 5)); + columns.push_back(StringRef("A", 1)); + tmmcif_p.SetAuthChainID(true); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseStructConf(columns)); + tmmcif_p.SetAuthChainID(false); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseStructConf(columns)); + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("struct_conf", 11)); + tmmcif_h.Add(StringRef("beg_label_asym_id", 17)); + tmmcif_h.Add(StringRef("beg_label_comp_id", 17)); + tmmcif_h.Add(StringRef("beg_label_seq_id", 16)); + tmmcif_h.Add(StringRef("conf_type_id", 12)); + tmmcif_h.Add(StringRef("end_label_asym_id", 17)); + tmmcif_h.Add(StringRef("end_label_comp_id", 17)); + tmmcif_h.Add(StringRef("end_label_seq_id", 16)); + tmmcif_h.Add(StringRef("id", 2)); + tmmcif_p.OnBeginLoop(tmmcif_h); + columns.pop_back(); + tmmcif_p.SetAuthChainID(true); + BOOST_CHECK_THROW(tmmcif_p.ParseStructConf(columns), IOException); + } + tmmcif_p.SetAuthChainID(false); + BOOST_MESSAGE(" done."); + + BOOST_MESSAGE(" done."); +} +BOOST_AUTO_TEST_CASE(mmcif_struct_sheet_range_tests) +{ + BOOST_MESSAGE(" Running mmcif_struct_sheet_range_tests..."); + mol::EntityHandle eh = mol::CreateEntity(); + TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + + BOOST_MESSAGE(" testing auth_chain_id switch..."); StarLoopDesc tmmcif_h; std::vector<StringRef> columns; - tmmcif_h.SetCategory(StringRef("struct_conf", 11)); + tmmcif_h.SetCategory(StringRef("struct_sheet_range", 18)); + tmmcif_h.Add(StringRef("sheet_id", 8)); tmmcif_h.Add(StringRef("beg_label_asym_id", 17)); tmmcif_h.Add(StringRef("beg_label_comp_id", 17)); tmmcif_h.Add(StringRef("beg_label_seq_id", 16)); - tmmcif_h.Add(StringRef("conf_type_id", 12)); tmmcif_h.Add(StringRef("end_label_asym_id", 17)); tmmcif_h.Add(StringRef("end_label_comp_id", 17)); tmmcif_h.Add(StringRef("end_label_seq_id", 16)); tmmcif_h.Add(StringRef("id", 2)); tmmcif_h.Add(StringRef("beg_auth_asym_id", 16)); tmmcif_p.OnBeginLoop(tmmcif_h); + columns.push_back(StringRef("Sheet1", 6)); columns.push_back(StringRef("A", 1)); columns.push_back(StringRef("ARG", 3)); columns.push_back(StringRef("1", 1)); - columns.push_back(StringRef("HELX_RH_AL_P", 12)); columns.push_back(StringRef("A", 1)); columns.push_back(StringRef("ARG", 3)); columns.push_back(StringRef("2", 1)); - columns.push_back(StringRef("DHLX1", 5)); + columns.push_back(StringRef("DSTRAND", 7)); columns.push_back(StringRef("A", 1)); tmmcif_p.SetAuthChainID(true); - BOOST_CHECK_NO_THROW(tmmcif_p.ParseStructConf(columns)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseStructSheetRange(columns)); tmmcif_p.SetAuthChainID(false); - BOOST_CHECK_NO_THROW(tmmcif_p.ParseStructConf(columns)); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseStructSheetRange(columns)); tmmcif_h.Clear(); - tmmcif_h.SetCategory(StringRef("struct_conf", 11)); + tmmcif_h.SetCategory(StringRef("struct_sheet_range", 11)); + tmmcif_h.Add(StringRef("sheet_id", 8)); tmmcif_h.Add(StringRef("beg_label_asym_id", 17)); tmmcif_h.Add(StringRef("beg_label_comp_id", 17)); tmmcif_h.Add(StringRef("beg_label_seq_id", 16)); - tmmcif_h.Add(StringRef("conf_type_id", 12)); tmmcif_h.Add(StringRef("end_label_asym_id", 17)); tmmcif_h.Add(StringRef("end_label_comp_id", 17)); tmmcif_h.Add(StringRef("end_label_seq_id", 16)); @@ -981,11 +1037,12 @@ BOOST_AUTO_TEST_CASE(mmcif_struct_conf_tests) tmmcif_p.OnBeginLoop(tmmcif_h); columns.pop_back(); tmmcif_p.SetAuthChainID(true); - BOOST_CHECK_THROW(tmmcif_p.ParseStructConf(columns), IOException); + BOOST_CHECK_THROW(tmmcif_p.ParseStructSheetRange(columns), IOException); tmmcif_p.SetAuthChainID(false); - BOOST_MESSAGE(" done."); + + BOOST_MESSAGE(" done."); } @@ -1139,6 +1196,9 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader) BOOST_CHECK_EQUAL(rl[0].GetSecStructure().IsHelical(), true); BOOST_CHECK_EQUAL(rl[1].GetSecStructure().IsHelical(), true); BOOST_CHECK_EQUAL(rl[2].GetSecStructure().IsExtended(), true); + ch = eh.FindChain("C"); + rl = ch.GetResidueList(); + BOOST_CHECK_EQUAL(rl[0].GetSecStructure().IsExtended(), true); BOOST_MESSAGE(" done."); BOOST_MESSAGE(" reading data fields which should not fail..."); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index 3268d114eea08cf4bb0072cf9bcecea410391222..1cd5d1c357fe5a84db67d7fbafdae1231a3ae937 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -109,6 +109,17 @@ HELX1 HELX_RH_AL_P VAL A 11 THR A 12 . STRN1 STRN ILE A 13 ILE A 13 . HELX1 HELX_RH_AL_P ILE Z 1 ILE Z 1 . +loop_ +_struct_sheet_range.sheet_id +_struct_sheet_range.id +_struct_sheet_range.beg_label_comp_id +_struct_sheet_range.beg_label_asym_id +_struct_sheet_range.beg_label_seq_id +_struct_sheet_range.end_label_comp_id +_struct_sheet_range.end_label_asym_id +_struct_sheet_range.end_label_seq_id +sheet_1 strand_a APS C 1 APS C 1 + loop_ _atom_site.group_PDB _atom_site.type_symbol @@ -152,10 +163,10 @@ ATOM C CB ILE A 13 . 1 21.236 34.463 16.492 1.00 22.67 . 13 21 ? A ATOM C CG1 ILE A 13 . 1 20.478 33.469 17.371 1.00 22.14 . 13 22 ? A ATOM C CG2 ILE A 13 . 1 21.357 33.986 15.016 1.00 21.75 . 13 23 ? A # - - - - data truncated for brevity - - - - -HETATM C C1 APS C 14 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? A -HETATM C C2 APS C 14 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? A -HETATM O O3 APS C 14 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? A -HETATM N N4 APS C 14 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? A +HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? A +HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? A +HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? A +HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? A # - - - - data truncated for brevity - - - - # chain to be ignored by 'restrict_chains' feature ATOM N N ILE Z 1 . 1 23.664 33.855 16.884 1.00 22.08 . 1 17 ? Z