From cd7013478638b9330113b0897dbc6373dc228c7a Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Tue, 30 May 2023 12:17:32 +0200 Subject: [PATCH] read SEQRES by default in mmcif reader Performance wise this doesn't really make a difference but properly setting the IsLigand property depends on it. --- modules/io/pymod/__init__.py | 6 +++--- modules/io/src/mol/mmcif_reader.cc | 20 +++++++++++++------- modules/io/src/mol/mmcif_reader.hh | 2 +- modules/io/tests/test_mmcif_reader.cc | 1 + 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 0f8131382..ca619a156 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -393,14 +393,15 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, interpreted as the pdb id. :type remote: :class:`bool` - :param seqres: Whether to read SEQRES records. If True, a + :param seqres: Whether to return SEQRES records. If True, a :class:`~ost.seq.SequenceList` object is returned as the second item. The sequences in the list are named according to the mmCIF chain name. This feature requires a default :class:`compound library <ost.conop.CompoundLib>` to be defined and accessible via - :func:`~ost.conop.GetDefaultLib` or an empty list is returned. + :func:`~ost.conop.GetDefaultLib`. One letter codes of non + standard compounds are set to X otherwise. :type seqres: :class:`bool` :param info: Whether to return an info container with the other output. @@ -434,7 +435,6 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, try: ent = mol.CreateEntity() reader = MMCifReader(filename, ent, prof) - reader.read_seqres = seqres # NOTE: to speed up things, we could introduce a restrict_chains parameter # similar to the one in LoadPDB. Here, it would have to be a list/set diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index e4ab24090..7f7966ce1 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -24,9 +24,14 @@ #include <ost/dyn_cast.hh> #include <ost/mol/xcs_editor.hh> #include <ost/conop/conop.hh> +#include <ost/conop/minimal_compound_lib.hh> #include <ost/io/mol/mmcif_reader.hh> + + +#include <iostream> + namespace ost { namespace io { @@ -65,7 +70,7 @@ void MMCifReader::Init() curr_chain_ = mol::ChainHandle(); curr_residue_ = mol::ResidueHandle(); seqres_ = seq::CreateSequenceList(); - read_seqres_ = false; + read_seqres_ = true; warned_rule_based_ = false; info_ = MMCifInfo(); } @@ -742,15 +747,16 @@ void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns) } else if (indices_[PDBX_SEQ_ONE_LETTER_CODE] != -1) { seqres=columns[indices_[PDBX_SEQ_ONE_LETTER_CODE]]; - conop::CompoundLibPtr comp_lib=conop::Conopology::Instance() - .GetDefaultLib(); + conop::CompoundLibBasePtr comp_lib=conop::Conopology::Instance() + .GetDefaultLib(); if (!comp_lib) { if (!warned_rule_based_) { - LOG_WARNING("SEQRES import requires a compound library. " - "Ignoring SEQRES records"); + LOG_WARNING("SEQRES import requires a valid compound library to " + "handle non standard compounds. Their One letter " + "codes will be set to X."); } warned_rule_based_=true; - return; + comp_lib = conop::CompoundLibBasePtr(new ost::conop::MinimalCompoundLib); } edm_it->second.seqres = this->ConvertSEQRES(seqres.str_no_whitespace(), comp_lib); @@ -763,7 +769,7 @@ void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns) } String MMCifReader::ConvertSEQRES(const String& seqres, - conop::CompoundLibPtr comp_lib) + conop::CompoundLibBasePtr comp_lib) { String can_seqres; for (String::const_iterator i=seqres.begin(), e=seqres.end(); i!=e; ++i) { diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 3d10f03d4..1f21c3928 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -240,7 +240,7 @@ protected: /// not possible, however, since the PDB assigns multiple one letter codes /// to some of the residues. To be consistent, we have to do the conversion /// on our own. - String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib); + String ConvertSEQRES(const String& seqres, conop::CompoundLibBasePtr compound_lib); /// \brief Fetch mmCIF citation_author information /// /// \param columns data row diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index bee89e0e2..e03eb0073 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -414,6 +414,7 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) BOOST_TEST_MESSAGE(" testing type recognition..."); { TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + tmmcif_p.SetReadSeqRes(false); std::vector<StringRef> columns; // create corresponding entity entry -- GitLab