From cd8ba0804d3a987f780db9f989bac3ab5ed4930c Mon Sep 17 00:00:00 2001 From: B13nch3n <stefan.bienert@me.com> Date: Tue, 21 Jul 2020 15:15:46 +0200 Subject: [PATCH] Allow free positioning of entity category in mmCIF files --- modules/io/src/mol/mmcif_reader.cc | 69 ++++++++++++--------------- modules/io/src/mol/mmcif_reader.hh | 6 +++ modules/io/tests/test_mmcif_reader.cc | 36 +++----------- 3 files changed, 43 insertions(+), 68 deletions(-) diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 1eb122070..e2f67b119 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -660,47 +660,50 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) columns[indices_[GROUP_PDB]][0]=='H'); } +MMCifReader::MMCifEntityDescMap::iterator MMCifReader::GetEntityDescMapIterator( + const String& entity_id) +{ + MMCifEntityDescMap::iterator edm_it = entity_desc_map_.find(entity_id); + // if the entity ID is not already stored, insert it with empty values + if (edm_it == entity_desc_map_.end()) { + MMCifEntityDesc desc = {.type=mol::CHAINTYPE_N_CHAINTYPES, + .details="", + .seqres=""}; + edm_it = entity_desc_map_.insert(entity_desc_map_.begin(), + MMCifEntityDescMap::value_type(entity_id, + desc)); + } + return edm_it; +} + void MMCifReader::ParseEntity(const std::vector<StringRef>& columns) { - bool store = false; // is it worth storing this record? - MMCifEntityDesc desc; + MMCifEntityDescMap::iterator edm_it = + GetEntityDescMapIterator(columns[indices_[E_ID]].str()); // type if (indices_[E_TYPE] != -1) { - desc.type = mol::ChainTypeFromString(columns[indices_[E_TYPE]]); - store = true; + // only use the entity type if no other is set, entity_poly type is + // more precise, so if that was set before just leave it in + if (edm_it->second.type == mol::CHAINTYPE_N_CHAINTYPES) { + edm_it->second.type = mol::ChainTypeFromString(columns[indices_[E_TYPE]]); + } + } else { + // don't deal with entities without type + entity_desc_map_.erase(edm_it); + return; } // description if (indices_[PDBX_DESCRIPTION] != -1) { - desc.details = columns[indices_[PDBX_DESCRIPTION]].str(); - } else { - desc.details = ""; - } - - if (store) { - desc.seqres = ""; - entity_desc_map_.insert( - MMCifEntityDescMap::value_type(columns[indices_[E_ID]].str(), - desc) - ); + edm_it->second.details = columns[indices_[PDBX_DESCRIPTION]].str(); } } void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns) { - // we assume that the entity cat. ALWAYS comes before the entity_poly cat. - // search entity MMCifEntityDescMap::iterator edm_it = - entity_desc_map_.find(columns[indices_[ENTITY_ID]].str()); - - if (edm_it == entity_desc_map_.end()) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "'entity_poly' category defined before 'entity' for id '" + - columns[indices_[ENTITY_ID]].str() + - "' or missing.", - this->GetCurrentLinenum())); - } + GetEntityDescMapIterator(columns[indices_[ENTITY_ID]].str()); // store type if (indices_[EP_TYPE] != -1) { @@ -1713,19 +1716,9 @@ void MMCifReader::ParseStructRefSeqDif(const std::vector<StringRef>& columns) void MMCifReader::ParsePdbxEntityBranch(const std::vector<StringRef>& columns) { - // we assume that the entity cat. ALWAYS comes before the pdbx_entity_branch - // cat. - // search entity + // get entity/ descreption entry MMCifEntityDescMap::iterator edm_it = - entity_desc_map_.find(columns[indices_[BR_ENTITY_ID]].str()); - - if (edm_it == entity_desc_map_.end()) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, - "'pdbx_entity_branch' category defined before 'entity' for id '" + - columns[indices_[BR_ENTITY_ID]].str() + - "' or missing.", - this->GetCurrentLinenum())); - } + GetEntityDescMapIterator(columns[indices_[BR_ENTITY_ID]].str()); // store type if (indices_[BR_ENTITY_TYPE] != -1) { diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index a9339c6ac..03ceb32c9 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -629,6 +629,10 @@ private: } MMCifEntityDesc; typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap; + /// \brief Get an iterator for MMCifEntityDescMap by finding an element or + /// inserting a new one into the map. + MMCifEntityDescMap::iterator GetEntityDescMapIterator(const String&); + /// \struct assembly information typedef struct { String biounit_id; ///< identifier for the bu @@ -739,3 +743,5 @@ DLLEXPORT_OST_IO String OSTBondOrderToMMCifValueOrder( }} #endif + +// LocalWords: MMCifEntityDescMap diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 65673c858..b6fa90f1f 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -311,9 +311,13 @@ BOOST_AUTO_TEST_CASE(mmcif_unknown_entity_type) columns.push_back(StringRef("polymer", 7)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntity(columns)); columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("2", 1)); columns.push_back(StringRef("non-polymer", 11)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntity(columns)); columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("3", 1)); columns.push_back(StringRef("water", 5)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntity(columns)); BOOST_TEST_MESSAGE(" done."); @@ -321,6 +325,8 @@ BOOST_AUTO_TEST_CASE(mmcif_unknown_entity_type) // negative BOOST_TEST_MESSAGE(" unknown type..."); columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("4", 1)); columns.push_back(StringRef("foo", 3)); BOOST_CHECK_THROW(tmmcif_p.ParseEntity(columns), Error); BOOST_TEST_MESSAGE(" done."); @@ -404,20 +410,6 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests) seq::SequenceHandle curr = seqres.FindSequence("A"); BOOST_CHECK(curr.GetString() == "VTI"); - BOOST_TEST_MESSAGE(" testing missing corresponding entity entry..."); - { - mol::EntityHandle eh = mol::CreateEntity(); - std::vector<StringRef> columns; - TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); - - tmmcif_h.SetCategory(StringRef("entity_poly", 11)); - tmmcif_h.Add(StringRef("entity_id", 9)); - tmmcif_p.OnBeginLoop(tmmcif_h); - - columns.push_back(StringRef("1", 1)); - BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException); - } - BOOST_TEST_MESSAGE(" done."); BOOST_TEST_MESSAGE(" testing type recognition..."); { TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); @@ -1435,22 +1427,6 @@ BOOST_AUTO_TEST_CASE(mmcif_pdbx_entity_branch_tests) mmcif_p.Parse(); - BOOST_TEST_MESSAGE(" testing missing corresponding entity entry..."); - { - mol::EntityHandle eh = mol::CreateEntity(); - std::vector<StringRef> columns; - TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); - - tmmcif_h.SetCategory(StringRef("pdbx_entity_branch", 18)); - tmmcif_h.Add(StringRef("entity_id", 9)); - tmmcif_h.Add(StringRef("type", 4)); - tmmcif_p.OnBeginLoop(tmmcif_h); - columns.push_back(StringRef("1", 1)); - columns.push_back(StringRef("oligosaccharide", 15)); - - BOOST_CHECK_THROW(tmmcif_p.ParsePdbxEntityBranch(columns), IOException); - } - BOOST_TEST_MESSAGE(" done."); BOOST_TEST_MESSAGE(" testing chain type recognition..."); { TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); -- GitLab