diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 0a463ac24c068bcea1d35b1ff3f5ddde0845ef42..d3f43064f995988df12c3d4e0279de2b201e960f 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -59,6 +59,7 @@ void MMCifParser::Init() go_on_ = true; //memset(indices_, -1, MAX_ITEMS_IN_ROW * sizeof(int)); restrict_chains_ = ""; + subst_res_id_ = ""; //curr_chain_ = mol::ChainHandle(); //curr_residue_ = mol::ResidueHandle(); } @@ -132,8 +133,11 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) indices_[OCCUPANCY] = header.GetIndex("occupancy"); indices_[B_ISO_OR_EQUIV] = header.GetIndex("B_iso_or_equiv"); indices_[GROUP_PDB] = header.GetIndex("group_PDB"); + indices_[AUTH_SEQ_ID] = header.GetIndex("auth_seq_id"); + indices_[PDBX_PDB_INS_CODE] = header.GetIndex("pdbx_PDB_ins_code"); return true; - } /*else if (header.GetCategory()=="entity_poly") { + } + /*else if (header.GetCategory()=="entity_poly") { } else if (header.GetCategory()=="pdbx_poly_seq_scheme") { } else if (header.GetCategory()=="pdbx_struct_assembly") { } else if (header.GetCategory()=="struct_conf") { @@ -150,6 +154,7 @@ bool MMCifParser::ParseAtomIdent(const std::vector<StringRef>& columns, String& chain_name, StringRef& res_name, mol::ResNum& resnum, + bool& valid_res_num, StringRef& atom_name, char& alt_loc) { @@ -172,21 +177,28 @@ bool MMCifParser::ParseAtomIdent(const std::vector<StringRef>& columns, } std::pair<bool, int> a_num = this->TryGetInt(columns[indices_[ID]], - "_atom_site.id", + "atom_site.id", profile_.fault_tolerant); // unit test alt_loc = columns[indices_[LABEL_ALT_ID]][0]; res_name = columns[indices_[LABEL_COMP_ID]]; - std::pair<bool, int> res_num =this->TryGetInt(columns[indices_[LABEL_SEQ_ID]], - "_atom_site.label_seq_id", - profile_.fault_tolerant); // unit test - if (!res_num.first) { // unit test - if (profile_.fault_tolerant) { - return false; + std::pair<bool, int> res_num; + if (columns[indices_[LABEL_SEQ_ID]][0] != '.') { + res_num =this->TryGetInt(columns[indices_[LABEL_SEQ_ID]], + "atom_site.label_seq_id", + profile_.fault_tolerant); // unit test + if (!res_num.first) { // unit test + if (profile_.fault_tolerant) { + return false; + } } + valid_res_num = true; + } else { + valid_res_num = false; + return true; } - resnum=to_res_num(res_num.second, ' '); + resnum = to_res_num(res_num.second, ' '); return true; } @@ -198,10 +210,12 @@ void MMCifParser::ParseAndAddAtom(const std::vector<StringRef>& columns) String chain_name; StringRef res_name, atom_name; mol::ResNum res_num(0); + bool valid_res_num = false; if (!this->ParseAtomIdent(columns, chain_name, res_name, res_num, + valid_res_num, atom_name, alt_loc)) { return; @@ -249,6 +263,24 @@ void MMCifParser::ParseAndAddAtom(const std::vector<StringRef>& columns) if(!curr_residue_) { // unit test update_residue=true; + } else if (!valid_res_num) { + if (indices_[AUTH_SEQ_ID] != -1 && + indices_[PDBX_PDB_INS_CODE] != -1) { + if (subst_res_id_ != + chain_name + + columns[indices_[AUTH_SEQ_ID]].str() + + columns[indices_[PDBX_PDB_INS_CODE]].str()) { + update_residue=true; + + subst_res_id_ = chain_name + + columns[indices_[AUTH_SEQ_ID]].str() + + columns[indices_[PDBX_PDB_INS_CODE]].str(); + } + } else { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Missing residue number information", + this->GetCurrentLinenum())); + } } else if(curr_residue_.GetNumber() != res_num) { // unit test update_residue=true; } @@ -265,12 +297,18 @@ void MMCifParser::ParseAndAddAtom(const std::vector<StringRef>& columns) if(update_residue) { // unit test curr_residue_=mol::ResidueHandle(); - if (profile_.join_spread_atom_records) { // unit test + if (valid_res_num && profile_.join_spread_atom_records) { // unit test curr_residue_=curr_chain_.FindResidue(res_num); } if (!curr_residue_.IsValid()) { // unit test LOG_DEBUG("new residue " << res_name << " " << res_num); - curr_residue_ =editor.AppendResidue(curr_chain_, res_name.str(), res_num); + if (valid_res_num) { + curr_residue_ = editor.AppendResidue(curr_chain_, + res_name.str(), + res_num); + } else { + curr_residue_ = editor.AppendResidue(curr_chain_, res_name.str()); + } warned_name_mismatch_=false; ++residue_count_; } diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 6f933ae12aa5d57b894baa31ab44eb6acae49610..b81faae37c92b0ecb9937340d68c1607d8215e9f 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -133,6 +133,7 @@ public: String& chain_name, StringRef& res_name, mol::ResNum& resnum, + bool& valid_res_num, StringRef& atom_name, char& alt_loc); @@ -158,12 +159,14 @@ private: LABEL_COMP_ID, LABEL_ENTITY_ID, LABEL_SEQ_ID, ///< residue no. + AUTH_SEQ_ID, ///< residue no. by author TYPE_SYMBOL, ///< chemical element CARTN_X, ///< Coordinates ||IMPORTANT: This 3 entries have to stay CARTN_Y, ///< Coordinates ||together for the parser to work! CARTN_Z, ///< Coordinates || OCCUPANCY, B_ISO_OR_EQUIV, + PDBX_PDB_INS_CODE, GROUP_PDB ///< record name } AtomSiteItems; @@ -187,6 +190,7 @@ private: int atom_count_; bool warned_name_mismatch_; bool go_on_; ///< flow control within the parser hooks + String subst_res_id_; ///< work around for missing label_seq_id's //from pdbdreader //entity als member, fill in ondatarow //import function diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 09541ca29d7e49cdf2a4b04fa71b3e769eaabddf..d14008fcb5a38f9f9cbc576c71f08a561aed5ee8 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -42,7 +42,6 @@ public: using MMCifParser::IsValidPDBIdent; using MMCifParser::ParseAtomIdent; using MMCifParser::ParseAndAddAtom; - using MMCifParser::EnsureEnoughColumns; using MMCifParser::TryStoreIdx; }; @@ -134,26 +133,6 @@ BOOST_AUTO_TEST_CASE(mmcif_atom_site_header) BOOST_MESSAGE(" done."); } -BOOST_AUTO_TEST_CASE(mmcif_ensureenoughcolumns) -{ - mol::EntityHandle eh=mol::CreateEntity(); - - BOOST_MESSAGE(" Running mmcif_ensureenoughcolumns tests..."); - std::ifstream s("testfiles/mmcif/atom_site.mmcif"); - IOProfile profile; - TestMMCifParserProtected tmmcif_p(s, eh, profile); - std::vector<StringRef> cols; - BOOST_MESSAGE(" testing short atom_site entry"); - cols.push_back(StringRef("ATOM", 4)); - BOOST_CHECK_THROW(tmmcif_p.EnsureEnoughColumns(cols, 2), IOException); - BOOST_MESSAGE(" testing correct number"); - BOOST_CHECK(tmmcif_p.EnsureEnoughColumns(cols, 1)); - BOOST_MESSAGE(" testing fault tolerant profile"); - profile.fault_tolerant = true; - BOOST_CHECK(!tmmcif_p.EnsureEnoughColumns(cols, 2)); - BOOST_MESSAGE(" done."); -} - BOOST_AUTO_TEST_CASE(mmcif_parseatomident) { mol::EntityHandle eh = mol::CreateEntity(); @@ -167,20 +146,20 @@ BOOST_AUTO_TEST_CASE(mmcif_parseatomident) StringRef res_name; mol::ResNum resnum(0); StringRef atom_name; - char alt_loc; + //char alt_loc; - BOOST_MESSAGE(" testing short atom_site entry"); + //BOOST_MESSAGE(" testing short atom_site entry"); // negative - cols.push_back(StringRef("ATOM", 4)); - BOOST_CHECK_THROW(tmmcif_p.ParseAtomIdent(cols, - chain_name, - res_name, - resnum, - atom_name, - alt_loc), IOException); + //cols.push_back(StringRef("ATOM", 4)); + //BOOST_CHECK_THROW(tmmcif_p.ParseAtomIdent(cols, + // chain_name, + // res_name, + // resnum, + // atom_name, + // alt_loc), IOException); // positive - StarLoopDesc tmmcif_h; - tmmcif_h.SetCategory(StringRef("atom_site", 9)); + //StarLoopDesc tmmcif_h; + //tmmcif_h.SetCategory(StringRef("atom_site", 9)); // build header //mmcif_h.Add(StringRef("AUTH_ASYM_ID", 12)); /* @@ -210,10 +189,10 @@ BOOST_AUTO_TEST_CASE(mmcif_parseandaddatom) TestMMCifParserProtected tmmcif_p(s, eh, IOProfile()); std::vector<StringRef> cols; - BOOST_MESSAGE(" testing short atom_site entry"); - cols.push_back(StringRef("ATOM", 4)); - BOOST_CHECK_THROW(tmmcif_p.ParseAndAddAtom(cols), IOException); - BOOST_MESSAGE(" done."); + //BOOST_MESSAGE(" testing short atom_site entry"); + //cols.push_back(StringRef("ATOM", 4)); + //BOOST_CHECK_THROW(tmmcif_p.ParseAndAddAtom(cols), IOException); + //BOOST_MESSAGE(" done."); } BOOST_AUTO_TEST_CASE(mmcif_testreader)