diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 756669d51900524a126e654685df79963f054cc7..0c15c3767167c9245acd645b4b7ceff2d1991640 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -80,6 +80,7 @@ void MMCifParser::ClearState() authors_map_.clear(); bu_origin_map_.clear(); bu_assemblies_.clear(); + secstruct_list_.clear(); } void MMCifParser::SetRestrictChains(const String& restrict_chains) @@ -260,6 +261,20 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) = header.GetIndex("pdbx_model_type_details"); indices_[STRUCT_TITLE] = header.GetIndex("title"); cat_available = true; + } else if (header.GetCategory() == "struct_conf") { + category_ = STRUCT_CONF; + // mandatory items + this->TryStoreIdx(BEG_LABEL_ASYM_ID, "beg_label_asym_id", header); + this->TryStoreIdx(BEG_LABEL_COMP_ID, "beg_label_comp_id", header); + this->TryStoreIdx(BEG_LABEL_SEQ_ID, "beg_label_seq_id", header); + this->TryStoreIdx(CONF_TYPE_ID, "conf_type_id", header); + this->TryStoreIdx(END_LABEL_ASYM_ID, "end_label_asym_id", header); + this->TryStoreIdx(END_LABEL_COMP_ID, "end_label_comp_id", header); + this->TryStoreIdx(END_LABEL_SEQ_ID, "end_label_seq_id", header); + this->TryStoreIdx(STRUCT_CONF_ID, "id", header); + // optional items + indices_[BEG_AUTH_ASYM_ID] = header.GetIndex("beg_auth_asym_id"); + cat_available = true; } category_counts_[category_]++; return cat_available; @@ -1008,6 +1023,34 @@ void MMCifParser::ParseStruct(const std::vector<StringRef>& columns) info_.SetStructDetails(details); } +void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns) +{ + StringRef chain_name; + int s_res_num; + int e_res_num; + // fetch start and end + s_res_num = this->TryGetInt(columns[indices_[BEG_LABEL_SEQ_ID]], + "struct_conf.beg_label_seq_id"); // unit test + e_res_num = this->TryGetInt(columns[indices_[END_LABEL_SEQ_ID]], + "struct_conf.end_label_seq_id"); // unit test + if(auth_chain_id_) { // unit test both ways + if (indices_[BEG_AUTH_ASYM_ID] != -1) { // unit test + chain_name = columns[indices_[BEG_AUTH_ASYM_ID]]; + } else { // unit test + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "foo", + this->GetCurrentLinenum())); + } + } else { // unit test + chain_name = columns[indices_[BEG_LABEL_ASYM_ID]]; + } + MMCifHSEntry hse = {to_res_num(s_res_num, ' '), + to_res_num(e_res_num, ' '), + chain_name.str(), + columns[indices_[CONF_TYPE_ID]].str()}; + secstruct_list_.push_back(hse); +} + void MMCifParser::OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -1056,6 +1099,10 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing struct entry") this->ParseStruct(columns); break; + case STRUCT_CONF: + LOG_TRACE("processing struct_conf entry") + this->ParseStructConf(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -1064,6 +1111,18 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, } } +void MMCifParser::AssignSecStructure(mol::EntityHandle ent) +{ + // for each helix, take chain + // check for overlaps (visual artifacts) + // assign helix to chain + + // for all strands + // take chain + // check for overlaps + // assign strand to chain +} + void MMCifParser::OnEndData() { mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); @@ -1155,6 +1214,8 @@ void MMCifParser::OnEndData() } bu_assemblies_.clear(); + // create secondary structure from struct_conf info + LOG_INFO("imported " << chain_count_ << " chains, " << residue_count_ << " residues, " diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index ee63212a7cfc184fcf1fb8000477a391aae6e14f..5bb2aeffd3bf7dab04a3ebd7ac8485bbb0bd04d0 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -270,6 +270,14 @@ protected: /// \param columns data row void ParseStruct(const std::vector<StringRef>& columns); + /// \brief Fetch MMCif struct_conf (secondary structure) information + /// + /// \param columns data row + void ParseStructConf(const std::vector<StringRef>& columns); + + /// \brief Transform data from struct_conf entry into secondary structure + void AssignSecStructure(mol::EntityHandle ent); + private: /// \enum magic numbers of this class typedef enum { @@ -394,6 +402,19 @@ private: STRUCT_TITLE ///< title for the data block } StructItems; + /// \enum items of the struct_conf category + typedef enum { + BEG_AUTH_ASYM_ID, ///< Starting residue, points to atom_site.auth_asym_id + BEG_LABEL_ASYM_ID, ///< Starting residue, points to atom_site.label_asym_id + BEG_LABEL_COMP_ID, ///< Starting residue, points to atom_site.label_comp_id + BEG_LABEL_SEQ_ID, ///< Starting residue, points to atom_site.label_seq_id + CONF_TYPE_ID, ///< Pointer to struct_conf_type.id + END_LABEL_ASYM_ID, ///< Ending residue, points to atom_site.label_asym_id + END_LABEL_COMP_ID, ///< Ending residue, points to atom_site.label_comp_id + END_LABEL_SEQ_ID, ///< Ending residue, points to atom_site.label_seq_id + STRUCT_CONF_ID, ///< Unique identifier + } StructConfItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -407,6 +428,7 @@ private: PDBX_STRUCT_ASSEMBLY_GEN, PDBX_STRUCT_OPER_LIST, STRUCT, + STRUCT_CONF, DONT_KNOW } MMCifCategory; @@ -429,6 +451,15 @@ private: typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > > MMCifCitationAuthorMap; + /// \struct store struct_conf info (secondary structure) + typedef struct { + mol::ResNum start; + mol::ResNum end; + String chain_name; + String type; + } MMCifHSEntry; + typedef std::vector<MMCifHSEntry> MMCifHSVector; + // members MMCifCategory category_; int category_counts_[DONT_KNOW+1]; ///< overall no. of atom_site loops @@ -457,6 +488,7 @@ private: MMCifCitationAuthorMap authors_map_; MMCifBioUAssemblyVector bu_assemblies_; std::map<String, String> bu_origin_map_; ///< pdbx_struct_assembly.details + MMCifHSVector secstruct_list_; ///< for storing struct_conf sec.struct. data }; }} diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index bff617cca9871872389e3495c76fa82040857303..99876427785ea3d741f081394d0036a4256df38d 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -724,8 +724,6 @@ BOOST_AUTO_TEST_CASE(mmcif_biounit_tests) tmmcif_h.Add(StringRef("matrix[3][2]", 12)); tmmcif_h.Add(StringRef("matrix[3][3]", 12)); - - tmmcif_p.OnBeginLoop(tmmcif_h); columns.pop_back(); @@ -940,6 +938,8 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader) BOOST_CHECK_EQUAL(rs->GetNumber().GetNum(), i); } + // add checking of struct_conf info, here + BOOST_MESSAGE(" done."); BOOST_MESSAGE(" reading data fields which should not fail..."); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index cf3b4c75192b01316625877d21852c2b7dc5e968..fdc059e508ab95ddb82f9c6d7290bf84cf6b0fbb 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -94,6 +94,27 @@ _struct.pdbx_formula_weight_method 'Good Guess' _struct.pdbx_model_details 'Even better guessing' _struct.pdbx_model_type_details 'Guess' +loop_ +_struct_conf.id +_struct_conf.conf_type_id +_struct_conf.beg_label_comp_id +_struct_conf.beg_label_asym_id +_struct_conf.beg_label_seq_id +_struct_conf.end_label_comp_id +_struct_conf.end_label_asym_id +_struct_conf.end_label_seq_id +_struct_conf.details +HELX1 HELX_RH_AL_P ARG A 87 GLN A 92 . +HELX2 HELX_RH_AL_P ARG B 287 GLN B 292 . +STRN1 STRN_P PRO A 1 LEU A 5 . +STRN2 STRN_P CYS B 295 PHE B 299 . +STRN3 STRN_P CYS A 95 PHE A 299 . +STRN4 STRN_P PRO B 201 LEU B 205 . +TURN1 TURN_TY1P_P ILE A 15 GLN A 18 . +TURN2 TURN_TY2_P GLY A 49 GLY A 52 . +TURN3 TURN_TY1P_P ILE A 55 HIS A 69 . +TURN4 TURN_TY1_P THR A 91 GLY A 94 . + loop_ _atom_site.group_PDB _atom_site.type_symbol