From 68899ae1d7285159c1c1e3025558fb9c8abf5803 Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavalias-github@xavier.robin.name> Date: Thu, 16 May 2024 15:46:53 +0200 Subject: [PATCH] fix: SCHWED-6274 read SEQRES from entity_poly_seq This commit additional drops more side-effects of the presence of a seqres record by moving all the logic to the GetSeqRes function, rather than being generated on the fly during parsing. --- CHANGELOG.txt | 3 + modules/io/doc/mmcif.rst | 26 +-- modules/io/pymod/__init__.py | 3 +- modules/io/pymod/export_mmcif_io.cc | 6 +- modules/io/src/mol/mmcif_info.hh | 3 +- modules/io/src/mol/mmcif_reader.cc | 187 ++++++++---------- modules/io/src/mol/mmcif_reader.hh | 43 +--- modules/io/src/mol/mmcif_str.cc | 1 - modules/io/tests/test_mmcif_reader.cc | 64 ------ .../io/tests/testfiles/mmcif/atom_site.mmcif | 9 + .../alg/tests/testfiles/align_to_seqres.mmcif | 82 +++++++- .../validate_segres_aln_breakage.mmcif | 81 +++++++- .../validate_seqres_aln_connected.mmcif | 83 +++++++- 13 files changed, 364 insertions(+), 227 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index f3f863c14..0864bb6e6 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -8,6 +8,9 @@ Changes in Release x.x.x * Added 'full_bs_search' argument in ligand scoring to optionally speed up computations in large complexes. the number of model binding sites + * SEQRES information is now read from the entity_poly_seq category in mmCIF. + The canonical as well as semi-canonical (with 3 letter-codes in bracket) + SEQRES are now available from the MMCifInfo object. * Several bug fixes and improvements. Changes in Release 2.7.0 diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 91d6cda1f..43618968f 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -65,8 +65,7 @@ Notes: It is added as string property named "pdb_auth_chain_name" to the :class:`~ost.mol.ChainHandle`. The mapping is also stored in :class:`MMCifInfo` as :meth:`~MMCifInfo.GetMMCifPDBChainTr` and - :meth:`~MMCifInfo.GetPDBMMCifChainTr` if a non-empty SEQRES record exists for - that chain (this should exclude ligands and water). + :meth:`~MMCifInfo.GetPDBMMCifChainTr` (the latter only for polymer chains). * Molecular entities in mmCIF are identified by an ``entity.id``, which is extracted from ``atom_site.label_entity_id`` for the first atom of the chain. It is added as string property named "entity_id" to the @@ -1417,16 +1416,21 @@ of the annotation available. :class:`str` - .. attribute:: seqres + .. attribute:: seqres_canonical - SEQRES with gentle preprocessing - empty string if entity is not of type - "polymer". By default, the :class:`ost.io.MMCifReader` reads the value of the - ``_entity_poly.pdbx_seq_one_letter_code`` token. Copies all letters but - searches a :class:`ost.conop.CompoundLib` for compound names in brackets. - *seqres* gets an 'X' if no compound is found or the respective compound has - one letter code '?'. Uses the one letter code of the found compound - otherwise. So it's basically a canonical SEQRES with exactly one character - per residue. + Canonical SEQRES - empty string if entity is not of type "polymer". + This contains the canonical sequence extracted from the + ``_entity_poly.pdbx_seq_one_letter_code_can`` data item. + + :type: :class:`str` + + .. attribute:: seqres_pdbx + + PDBx SEQRES - empty string if entity is not of type "polymer". + This contains the sequence extracted from the + ``_entity_poly.pdbx_seq_one_letter_code`` data item. + Modifications and non-standard amino acids are represented by + their three letter code in brackets, e.g. "(MSE)" :type: :class:`str` diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 89c1c3c8e..66a518def 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -430,8 +430,7 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, try: ent = mol.CreateEntity() reader = MMCifReader(filename, ent, prof) - reader.read_seqres = seqres - + # NOTE: to speed up things, we could introduce a restrict_chains parameter # similar to the one in LoadPDB. Here, it would have to be a list/set # of chain-name-strings. diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index fa09b5bfa..aa6595bc2 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -107,7 +107,6 @@ void export_mmcif_io() class_<MMCifReader, boost::noncopyable>("MMCifReader", init<const String&, EntityHandle&, const IOProfile&>()) .def("Parse", &MMCifReader::Parse) .def("SetRestrictChains", &MMCifReader::SetRestrictChains) - .def("SetReadCanonicalSeqRes", &MMCifReader::SetReadCanonicalSeqRes) .def("GetSeqRes", &MMCifReader::GetSeqRes) .def("GetInfo", make_function(&MMCifReader::GetInfo, return_value_policy<copy_const_reference>())) @@ -116,8 +115,6 @@ void export_mmcif_io() return_value_policy<copy_const_reference>()), &MMCifReader::SetRestrictChains) .add_property("seqres", &MMCifReader::GetSeqRes) - .add_property("read_seqres", &MMCifReader::GetReadSeqRes, - &MMCifReader::SetReadSeqRes) .add_property("info", make_function(&MMCifReader::GetInfo, return_value_policy<copy_const_reference>())) ; @@ -493,7 +490,8 @@ void export_mmcif_io() .add_property("entity_poly_type", &MMCifEntityDesc::entity_poly_type) .add_property("branched_type", &MMCifEntityDesc::branched_type) .add_property("details", &MMCifEntityDesc::details) - .add_property("seqres", &MMCifEntityDesc::seqres) + .add_property("seqres_canonical", &MMCifEntityDesc::seqres_canonical) + .add_property("seqres_pdbx", &MMCifEntityDesc::seqres_pdbx) .add_property("mon_ids", &MMCifEntityDesc::mon_ids) .add_property("hetero_num", &MMCifEntityDesc::hetero_num) .add_property("hetero_ids", &MMCifEntityDesc::hetero_ids) diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index c658af2b5..2892ae7c6 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -957,7 +957,8 @@ typedef struct { String entity_poly_type; ///< value of _entity_poly.type String branched_type; ///< value of _pdbx_entity_branch.type String details; ///< description of this entity - String seqres; ///< chain of monomers + String seqres_canonical; ///< _entity_poly.pdbx_seq_one_letter_code_can + String seqres_pdbx; ///< _entity_poly.pdbx_seq_one_letter_code std::vector<String> mon_ids; ///< list of monomer names from _entity_poly_seq std::vector<int> hetero_num; ///< res num of heterogeneous compounds std::vector<String> hetero_ids;///< names of heterogeneous compounds diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 6ad799100..2a3fed856 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -59,15 +59,11 @@ void MMCifReader::Init() atom_count_ = 0; residue_count_ = 0; auth_chain_id_ = false; - seqres_can_ = false; has_model_ = false; restrict_chains_ = ""; subst_res_id_ = ""; curr_chain_ = mol::ChainHandle(); curr_residue_ = mol::ResidueHandle(); - seqres_ = seq::CreateSequenceList(); - read_seqres_ = true; - warned_rule_based_ = false; info_ = MMCifInfo(); } @@ -80,7 +76,6 @@ void MMCifReader::ClearState() atom_count_ = 0; category_ = DONT_KNOW; warned_name_mismatch_ = false; - seqres_ = seq::CreateSequenceList(); info_ = MMCifInfo(); entity_desc_map_.clear(); authors_map_.clear(); @@ -737,7 +732,8 @@ MMCifEntityDescMap::iterator MMCifReader::GetEntityDescMapIterator( .entity_poly_type = "", .branched_type = "", .details="", - .seqres="", + .seqres_canonical="", + .seqres_pdbx="", .mon_ids=std::vector<String>(), .hetero_num=std::vector<int>(), .hetero_ids=std::vector<String>()}; @@ -785,93 +781,17 @@ void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns) edm_it->second.entity_poly_type = columns[indices_[EP_TYPE]].str(); } - // store seqres - if (edm_it->second.seqres.length() > 0) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "entity_poly.pdbx_seq_one_letter_code[_can] clash: sequence for entry '" + - columns[indices_[ENTITY_ID]].str() + - "' is already set to '" + - edm_it->second.seqres + "'.", - this->GetCurrentLinenum())); + // store canonical seqres + if (indices_[PDBX_SEQ_ONE_LETTER_CODE_CAN] != -1) { + StringRef seqres_can=columns[indices_[PDBX_SEQ_ONE_LETTER_CODE_CAN]]; + edm_it->second.seqres_canonical = seqres_can.str_no_whitespace(); } - if (read_seqres_) { - StringRef seqres; - if (seqres_can_) { - if (indices_[PDBX_SEQ_ONE_LETTER_CODE_CAN] != -1) { - seqres=columns[indices_[PDBX_SEQ_ONE_LETTER_CODE_CAN]]; - edm_it->second.seqres = seqres.str_no_whitespace(); - } else { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "'entity_poly.pdbx_seq_one_letter_code_can' not available.'", - this->GetCurrentLinenum())); - } - } else if (indices_[PDBX_SEQ_ONE_LETTER_CODE] != -1) { - seqres=columns[indices_[PDBX_SEQ_ONE_LETTER_CODE]]; - - conop::CompoundLibBasePtr comp_lib=conop::Conopology::Instance() - .GetDefaultLib(); - if (!comp_lib) { - if (!warned_rule_based_) { - LOG_WARNING("SEQRES import requires a valid compound library to " - "handle non standard compounds. Their One letter " - "codes will be set to X."); - } - warned_rule_based_=true; - comp_lib = conop::CompoundLibBasePtr(new ost::conop::MinimalCompoundLib); - } - edm_it->second.seqres = this->ConvertSEQRES(seqres.str_no_whitespace(), - comp_lib); - } else { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "'entity_poly.pdbx_seq_one_letter_code' not available.'", - this->GetCurrentLinenum())); - } - } -} - -String MMCifReader::ConvertSEQRES(const String& seqres, - conop::CompoundLibBasePtr comp_lib) -{ - String can_seqres; - for (String::const_iterator i=seqres.begin(), e=seqres.end(); i!=e; ++i) { - if (*i=='(') { - bool found_end_paren=false; - String tlc; - tlc.reserve(3); - while ((++i)!=seqres.end()) { - if (*i==')') { - found_end_paren=true; - break; - } - tlc.push_back(*i); - } - if (!found_end_paren) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "'entity_poly.pdbx_seq_one_letter_code' contains " - "unmatched '('", this->GetCurrentLinenum())); - } - conop::CompoundPtr compound=comp_lib->FindCompound(tlc, - conop::Compound::PDB); - if (!compound) { - if (tlc!="UNK") { - LOG_WARNING("unknown residue '" << tlc << "' in SEQRES record. " - "Setting one-letter-code to 'X'"); - } - can_seqres.push_back('X'); - continue; - } - if (compound->GetOneLetterCode()=='?') { - can_seqres.push_back('X'); - } else { - can_seqres.push_back(compound->GetOneLetterCode()); - } - - } else { - can_seqres.push_back(*i); + // store non canonical seqres + if (indices_[PDBX_SEQ_ONE_LETTER_CODE] != -1) { + StringRef seqres_pdbx=columns[indices_[PDBX_SEQ_ONE_LETTER_CODE]]; + edm_it->second.seqres_pdbx = seqres_pdbx.str_no_whitespace(); } - } - return can_seqres; } void MMCifReader::ParseCitation(const std::vector<StringRef>& columns) @@ -1943,11 +1863,13 @@ void MMCifReader::OnEndData() if (edm_it != entity_desc_map_.end()) { editor.SetChainType(css->first, edm_it->second.type); editor.SetChainDescription(css->first, edm_it->second.details); - if (edm_it->second.seqres.length() > 0) { - seqres_.AddSequence(seq::CreateSequence(css->first.GetName(), - edm_it->second.seqres)); - pdb_auth_chain_name = css->first.GetStringProp("pdb_auth_chain_name"); - info_.AddMMCifPDBChainTr(css->first.GetName(), pdb_auth_chain_name); + // Add chain mapping for all chains + pdb_auth_chain_name = css->first.GetStringProp("pdb_auth_chain_name"); + info_.AddMMCifPDBChainTr(css->first.GetName(), pdb_auth_chain_name); + + if (edm_it->second.entity_type=="polymer") { + // PDB -> mmCIF chain mapping only for polymers + // This is not a 1:1 mapping because of ligands info_.AddPDBMMCifChainTr(pdb_auth_chain_name, css->first.GetName()); } else if (edm_it->second.entity_type=="non-polymer") { mol::ChainHandle chain=css->first; @@ -1959,7 +1881,8 @@ void MMCifReader::OnEndData() } } else { LOG_WARNING("No entity description found for atom_site.label_entity_id '" - << css->second << "'"); + << css->second << "'. SEQRES, chain mapping and ligand " + << " annotation will be missing."); } // find blm_it = entity_branch_link_map_.find(css->second); @@ -2075,19 +1998,19 @@ void MMCifReader::OnEndData() } // conclude EntityDesc (add entity_poly_seq if present) and add to MMCifInfo - for(auto entity_it: entity_desc_map_) { + for(auto &entity_it: entity_desc_map_) { if(entity_poly_seq_map_.find(entity_it.first) != entity_poly_seq_map_.end()) { int max_num = 1; - for(auto seqres_it: entity_poly_seq_map_[entity_it.first]) { + for(auto &seqres_it: entity_poly_seq_map_[entity_it.first]) { max_num = std::max(max_num, seqres_it.first); } entity_it.second.mon_ids.assign(max_num, "?"); - for(auto seqres_it: entity_poly_seq_map_[entity_it.first]) { + for(auto &seqres_it: entity_poly_seq_map_[entity_it.first]) { entity_it.second.mon_ids[seqres_it.first-1] = seqres_it.second; } } if(entity_poly_seq_h_map_.find(entity_it.first) != entity_poly_seq_h_map_.end()) { - for(auto hetero_it: entity_poly_seq_h_map_[entity_it.first]) { + for(auto &hetero_it: entity_poly_seq_h_map_[entity_it.first]) { entity_it.second.hetero_num.push_back(hetero_it.first); entity_it.second.hetero_ids.push_back(hetero_it.second); } @@ -2163,4 +2086,68 @@ String OSTBondOrderToMMCifValueOrder(const unsigned char bond_order) return String(""); } +seq::SequenceList MMCifReader::GetSeqRes() const { + std::map<String, String> entity_seqres_map; // Map entity_id -> seqres + seq::SequenceList seqres_list = seq::CreateSequenceList(); + + // We need a compound lib for the conversion + conop::CompoundLibBasePtr comp_lib=conop::Conopology::Instance() + .GetDefaultLib(); + if (!comp_lib) { + LOG_WARNING("SEQRES requires a valid compound library to " + "handle non standard compounds. Their One letter " + "codes will be set to X."); + comp_lib = conop::CompoundLibBasePtr(new ost::conop::MinimalCompoundLib); + } + + // Generate the SEQRES for every entity + for(auto const &entity_it: entity_desc_map_) { + if (entity_it.second.entity_type == "polymer") { + entity_seqres_map[entity_it.first] = ""; + auto mon_ids = entity_it.second.mon_ids; +// if (mon_ids.size() == 0) { +// // We hit this if there was an _entity_poly category but no _entity_poly_seq +// LOG_WARNING("No SEQRES found for entity '" +// << entity_it.first << "'."); +// } + entity_seqres_map[entity_it.first].reserve(mon_ids.size()); + for (auto const &mon_id: mon_ids) { + conop::CompoundPtr compound=comp_lib->FindCompound(mon_id, conop::Compound::PDB); + if (!compound) { + if (mon_id != "UNK") { + LOG_WARNING("unknown residue '" << mon_id << "' in SEQRES record. " + "Setting one-letter-code to 'X'"); + } + entity_seqres_map[entity_it.first].push_back('X'); + continue; + } + if (compound->GetOneLetterCode()=='?') { + entity_seqres_map[entity_it.first].push_back('X'); + } else { + entity_seqres_map[entity_it.first].push_back(compound->GetOneLetterCode()); + } + } + } + } + + // Assign + for (auto const &css: chain_id_pairs_) { + auto entity_seqres_map_it = entity_seqres_map.find(css.second); + if (entity_seqres_map_it != entity_seqres_map.end()) { + if (entity_seqres_map_it->second == "") { + // We hit this if there was an _entity_poly category but no _entity_poly_seq + LOG_WARNING("No SEQRES found for chain '" + << css.first << "'. Most likely the entity_poly_seq " + << "category was missing from the input file."); + } else { + seqres_list.AddSequence(seq::CreateSequence(css.first.GetName(), + entity_seqres_map_it->second)); + } + } + // else: either non polymer chain, or no entity_poly was available (this + // triggered a warning before) + } + return seqres_list; +} + }} diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 28798a49f..cc61e9ca3 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -89,16 +89,6 @@ public: /// \param restrict_chains chain name void SetRestrictChains(const String& restrict_chains); - /// \brief Toggle reading of canonical sequence residues - /// (entity_poly.pdbx_seq_one_letter_code_can instead of - /// entity_poly.pdbx_seq_one_letter_code). This flag is exclusive. - /// - /// \param flag True for reading canonical sequences. - void SetReadCanonicalSeqRes(bool flag) - { - seqres_can_ = flag; - } - const String& GetRestrictChains() const { return restrict_chains_; @@ -141,25 +131,7 @@ public: /// \brief Return sequences /// /// \return List of sequences - seq::SequenceList GetSeqRes() const { - return seqres_; - } - - /// \brief Toggle reading of SEQRES - /// - /// \param flag True enables, False disables reading SEQRES - void SetReadSeqRes(bool flag) - { - read_seqres_ = flag; - } - - /// \brief Check if reading of SEQRES is enabled - /// - /// \return True if reading of SEQRES is enabled - bool GetReadSeqRes() const - { - return read_seqres_; - } + seq::SequenceList GetSeqRes() const; /// \brief Get additional information of the mmCIF file. /// @@ -232,15 +204,7 @@ protected: void ParseCitation(const std::vector<StringRef>& columns); const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; } - /// \brief convert the seqres data item to canonical form. - /// - /// The seqres sequence lists non-standard residues in parenthesis. For - /// proper handling of our sequence classes, these need to be converted to - /// one-letter-codes. Ideally, we would use the canonical SEQRES. This is - /// not possible, however, since the PDB assigns multiple one letter codes - /// to some of the residues. To be consistent, we have to do the conversion - /// on our own. - String ConvertSEQRES(const String& seqres, conop::CompoundLibBasePtr compound_lib); + /// \brief Fetch mmCIF citation_author information /// /// \param columns data row @@ -717,7 +681,6 @@ private: mol::EntityHandle& ent_handle_; String restrict_chains_; bool auth_chain_id_; ///< use chain IDs given by authors rather than pdb - bool seqres_can_; ///< read canonical 1-letter residues? mol::ChainHandle curr_chain_; mol::ResidueHandle curr_residue_; int chain_count_; @@ -731,8 +694,6 @@ private: std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_; ///< chain and label_entity_id MMCifEntityDescMap entity_desc_map_; ///< stores entity items - seq::SequenceList seqres_; - bool read_seqres_; MMCifInfo info_; ///< info container MMCifCitationAuthorMap authors_map_; MMCifBioUAssemblyVector bu_assemblies_; diff --git a/modules/io/src/mol/mmcif_str.cc b/modules/io/src/mol/mmcif_str.cc index 6fde265e4..604af0d2d 100644 --- a/modules/io/src/mol/mmcif_str.cc +++ b/modules/io/src/mol/mmcif_str.cc @@ -50,7 +50,6 @@ MMCifStringToEntity(const String& mmcif, const IOProfile& profile, bool process) std::stringstream stream(mmcif); mol::EntityHandle ent = mol::CreateEntity(); MMCifReader reader(stream, ent, profile); - reader.SetReadSeqRes(true); reader.Parse(); if(profile.processor && process) { profile.processor->Process(ent); diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 6d81b4603..4670b815a 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -65,10 +65,7 @@ public: using MMCifReader::ParsePdbxEntityBranchLink; using MMCifReader::TryStoreIdx; using MMCifReader::SetRestrictChains; - using MMCifReader::SetReadSeqRes; - using MMCifReader::SetReadCanonicalSeqRes; using MMCifReader::ClearState; - using MMCifReader::ConvertSEQRES; using MMCifReader::GetInfo; using MMCifReader::DetermineSecStructType; using MMCifReader::MMCifSecStructElement; @@ -128,23 +125,6 @@ BOOST_AUTO_TEST_CASE(mmcif_trystoreidx) BOOST_CHECK_NO_THROW(tmmcif_p.TryStoreIdx(0, "bar", mmcif_h)); } -BOOST_AUTO_TEST_CASE(mmcif_convert_seqres) -{ - conop::CompoundLibPtr compound_lib = SetDefaultCompoundLib(); - if (!compound_lib) { - std::cout << "WARNING: skipping mmcif_convert_seqres unit test. " - << "Compound library is required" << std::endl; - return; - } - - mol::EntityHandle eh=mol::CreateEntity(); - - TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); - BOOST_CHECK_EQUAL(tmmcif_p.ConvertSEQRES("A(MSE)Y", compound_lib), "AMY"); - BOOST_CHECK_THROW(tmmcif_p.ConvertSEQRES("A(MSEY", compound_lib), - IOException); -} - BOOST_AUTO_TEST_CASE(mmcif_onbeginloop) { mol::EntityHandle eh=mol::CreateEntity(); @@ -404,7 +384,6 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) mol::EntityHandle eh = mol::CreateEntity(); MMCifReader mmcif_p("testfiles/mmcif/atom_site.mmcif", eh, profile); - mmcif_p.SetReadSeqRes(true); mmcif_p.Parse(); seq::SequenceList seqres = mmcif_p.GetSeqRes(); @@ -414,7 +393,6 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) BOOST_TEST_MESSAGE(" testing type recognition..."); { TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); - tmmcif_p.SetReadSeqRes(false); std::vector<StringRef> columns; // create corresponding entity entry @@ -493,46 +471,7 @@ columns.push_back(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid", columns.push_back(StringRef("1", 1)); columns.push_back(StringRef("other", 5)); columns.push_back(StringRef("ABRND", 5)); - tmmcif_p.SetReadSeqRes(true); - tmmcif_p.SetReadCanonicalSeqRes(true); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); - tmmcif_p.SetReadCanonicalSeqRes(false); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); - } - BOOST_TEST_MESSAGE(" done."); - BOOST_TEST_MESSAGE(" testing pdbx_seq_one_letter_code_can " - "reading..."); - { - TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); - std::vector<StringRef> columns; - - tmmcif_h.Clear(); - tmmcif_h.SetCategory(StringRef("entity", 6)); - tmmcif_h.Add(StringRef("id", 2)); - tmmcif_h.Add(StringRef("type", 4)); - tmmcif_p.OnBeginLoop(tmmcif_h); - columns.push_back(StringRef("1", 1)); - columns.push_back(StringRef("polymer", 7)); - tmmcif_p.ParseEntity(columns); - columns.pop_back(); - columns.pop_back(); - - tmmcif_h.Clear(); - tmmcif_h.SetCategory(StringRef("entity_poly", 11)); - tmmcif_h.Add(StringRef("entity_id", 9)); - tmmcif_h.Add(StringRef("type", 4)); - tmmcif_h.Add(StringRef("pdbx_seq_one_letter_code_can", 28)); - tmmcif_p.OnBeginLoop(tmmcif_h); - tmmcif_p.SetReadCanonicalSeqRes(false); - columns.push_back(StringRef("1", 1)); - columns.push_back(StringRef("other", 5)); - columns.push_back(StringRef("ABRND", 5)); - tmmcif_p.SetReadSeqRes(true); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); - tmmcif_p.SetReadCanonicalSeqRes(true); - BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns)); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); } BOOST_TEST_MESSAGE(" done."); @@ -1323,9 +1262,6 @@ BOOST_AUTO_TEST_CASE(mmcif_test_chain_mappings) std::ifstream s("testfiles/mmcif/atom_site.mmcif"); IOProfile profile; MMCifReader mmcif_p(s, eh, profile); - if (compound_lib_available) { - mmcif_p.SetReadSeqRes(true); - } BOOST_REQUIRE_NO_THROW(mmcif_p.Parse()); const MMCifInfo& info = mmcif_p.GetInfo(); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index 9bd8159d6..e01cf812a 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -26,6 +26,15 @@ _entity_poly.nstd_linkage no _entity_poly.nstd_monomer no _entity_poly.pdbx_seq_one_letter_code 'VTI' +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 VAL n +1 2 THR n +1 3 ILE n + loop_ _citation.id _citation.abstract_id_CAS diff --git a/modules/seq/alg/tests/testfiles/align_to_seqres.mmcif b/modules/seq/alg/tests/testfiles/align_to_seqres.mmcif index 177b53a64..9fce63c4b 100644 --- a/modules/seq/alg/tests/testfiles/align_to_seqres.mmcif +++ b/modules/seq/alg/tests/testfiles/align_to_seqres.mmcif @@ -20,7 +20,87 @@ _entity_poly.nstd_monomer no _entity_poly.pdbx_seq_one_letter_code MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGKK _entity_poly.pdbx_seq_one_letter_code_can MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGKK _entity_poly.pdbx_strand_id A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V -# +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 MET n +1 2 TYR n +1 3 THR n +1 4 ASN n +1 5 SER n +1 6 ASP n +1 7 PHE n +1 8 VAL n +1 9 VAL n +1 10 ILE n +1 11 LYS n +1 12 ALA n +1 13 LEU n +1 14 GLU n +1 15 ASP n +1 16 GLY n +1 17 VAL n +1 18 ASN n +1 19 VAL n +1 20 ILE n +1 21 GLY n +1 22 LEU n +1 23 THR n +1 24 ARG n +1 25 GLY n +1 26 ALA n +1 27 ASP n +1 28 THR n +1 29 ARG n +1 30 PHE n +1 31 HIS n +1 32 HIS n +1 33 SER n +1 34 GLU n +1 35 LYS n +1 36 LEU n +1 37 ASP n +1 38 LYS n +1 39 GLY n +1 40 GLU n +1 41 VAL n +1 42 LEU n +1 43 ILE n +1 44 ALA n +1 45 GLN n +1 46 PHE n +1 47 THR n +1 48 GLU n +1 49 HIS n +1 50 THR n +1 51 SER n +1 52 ALA n +1 53 ILE n +1 54 LYS n +1 55 VAL n +1 56 ARG n +1 57 GLY n +1 58 LYS n +1 59 ALA n +1 60 TYR n +1 61 ILE n +1 62 GLN n +1 63 THR n +1 64 ARG n +1 65 HIS n +1 66 GLY n +1 67 VAL n +1 68 ILE n +1 69 GLU n +1 70 SER n +1 71 GLU n +1 72 GLY n +1 73 LYS n +1 74 LYS n +# loop_ _atom_site.group_PDB _atom_site.id diff --git a/modules/seq/alg/tests/testfiles/validate_segres_aln_breakage.mmcif b/modules/seq/alg/tests/testfiles/validate_segres_aln_breakage.mmcif index 0d66b8fa2..57d45b9df 100644 --- a/modules/seq/alg/tests/testfiles/validate_segres_aln_breakage.mmcif +++ b/modules/seq/alg/tests/testfiles/validate_segres_aln_breakage.mmcif @@ -19,7 +19,86 @@ _entity_poly.nstd_linkage no _entity_poly.nstd_monomer no _entity_poly.pdbx_seq_one_letter_code MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEKK _entity_poly.pdbx_seq_one_letter_code_can MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEKK -_entity_poly.pdbx_strand_id A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V +_entity_poly.pdbx_strand_id A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 MET n +1 2 TYR n +1 3 THR n +1 4 ASN n +1 5 SER n +1 6 ASP n +1 7 PHE n +1 8 VAL n +1 9 VAL n +1 10 ILE n +1 11 LYS n +1 12 ALA n +1 13 LEU n +1 14 GLU n +1 15 ASP n +1 16 GLY n +1 17 VAL n +1 18 ASN n +1 19 VAL n +1 20 ILE n +1 21 GLY n +1 22 LEU n +1 23 THR n +1 24 ARG n +1 25 GLY n +1 26 ALA n +1 27 ASP n +1 28 THR n +1 29 ARG n +1 30 PHE n +1 31 HIS n +1 32 HIS n +1 33 SER n +1 34 GLU n +1 35 LYS n +1 36 LEU n +1 37 ASP n +1 38 LYS n +1 39 GLY n +1 40 GLU n +1 41 VAL n +1 42 LEU n +1 43 ILE n +1 44 ALA n +1 45 GLN n +1 46 PHE n +1 47 THR n +1 48 GLU n +1 49 HIS n +1 50 THR n +1 51 SER n +1 52 ALA n +1 53 ILE n +1 54 LYS n +1 55 VAL n +1 56 ARG n +1 57 GLY n +1 58 LYS n +1 59 ALA n +1 60 TYR n +1 61 ILE n +1 62 GLN n +1 63 THR n +1 64 ARG n +1 65 HIS n +1 66 GLY n +1 67 VAL n +1 68 ILE n +1 69 GLU n +1 70 SER n +1 71 GLU n +1 72 LYS n +1 73 LYS n # loop_ _atom_site.group_PDB diff --git a/modules/seq/alg/tests/testfiles/validate_seqres_aln_connected.mmcif b/modules/seq/alg/tests/testfiles/validate_seqres_aln_connected.mmcif index 2e76cf323..fb3a0807f 100644 --- a/modules/seq/alg/tests/testfiles/validate_seqres_aln_connected.mmcif +++ b/modules/seq/alg/tests/testfiles/validate_seqres_aln_connected.mmcif @@ -19,7 +19,88 @@ _entity_poly.nstd_linkage no _entity_poly.nstd_monomer no _entity_poly.pdbx_seq_one_letter_code MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGGKK _entity_poly.pdbx_seq_one_letter_code_can MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGGKK -_entity_poly.pdbx_strand_id A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V +_entity_poly.pdbx_strand_id A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 MET n +1 2 TYR n +1 3 THR n +1 4 ASN n +1 5 SER n +1 6 ASP n +1 7 PHE n +1 8 VAL n +1 9 VAL n +1 10 ILE n +1 11 LYS n +1 12 ALA n +1 13 LEU n +1 14 GLU n +1 15 ASP n +1 16 GLY n +1 17 VAL n +1 18 ASN n +1 19 VAL n +1 20 ILE n +1 21 GLY n +1 22 LEU n +1 23 THR n +1 24 ARG n +1 25 GLY n +1 26 ALA n +1 27 ASP n +1 28 THR n +1 29 ARG n +1 30 PHE n +1 31 HIS n +1 32 HIS n +1 33 SER n +1 34 GLU n +1 35 LYS n +1 36 LEU n +1 37 ASP n +1 38 LYS n +1 39 GLY n +1 40 GLU n +1 41 VAL n +1 42 LEU n +1 43 ILE n +1 44 ALA n +1 45 GLN n +1 46 PHE n +1 47 THR n +1 48 GLU n +1 49 HIS n +1 50 THR n +1 51 SER n +1 52 ALA n +1 53 ILE n +1 54 LYS n +1 55 VAL n +1 56 ARG n +1 57 GLY n +1 58 LYS n +1 59 ALA n +1 60 TYR n +1 61 ILE n +1 62 GLN n +1 63 THR n +1 64 ARG n +1 65 HIS n +1 66 GLY n +1 67 VAL n +1 68 ILE n +1 69 GLU n +1 70 SER n +1 71 GLU n +1 72 GLY n +1 73 GLY n +1 74 LYS n +1 75 LYS n # loop_ _atom_site.group_PDB -- GitLab