From 080b744e82eb5869e7b694f6e072a0930c8f9493 Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Tue, 2 Aug 2011 18:47:33 +0200
Subject: [PATCH] MMCifParser is knwo reading entity_poly entries (more
information on chains)
---
modules/io/src/mol/mmcif_reader.cc | 66 ++++++++++++-
modules/io/src/mol/mmcif_reader.hh | 12 +++
modules/io/tests/test_mmcif_reader.cc | 93 ++++++++++++++++++-
.../io/tests/testfiles/mmcif/atom_site.mmcif | 5 +
4 files changed, 171 insertions(+), 5 deletions(-)
diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc
index cf5c49016..dd43aa093 100644
--- a/modules/io/src/mol/mmcif_reader.cc
+++ b/modules/io/src/mol/mmcif_reader.cc
@@ -157,9 +157,15 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header)
indices_[E_TYPE] = header.GetIndex("type");
indices_[DETAILS] = header.GetIndex("details");
return true;
- }
- /*else if (header.GetCategory()=="entity_poly") {
- } else if (header.GetCategory()=="pdbx_poly_seq_scheme") {
+ } else if (header.GetCategory()=="entity_poly") {
+ category_ = ENTITY_POLY;
+ category_counts_[category_]++;
+ // mandatory
+ this->TryStoreIdx(ENTITY_ID, "entity_id", header);
+ // optional
+ indices_[EP_TYPE] = header.GetIndex("type");
+ return true;
+ } /*else if (header.GetCategory()=="pdbx_poly_seq_scheme") {
} else if (header.GetCategory()=="pdbx_struct_assembly") {
} else if (header.GetCategory()=="struct_conf") {
}*/
@@ -467,6 +473,55 @@ void MMCifParser::ParseEntity(const std::vector<StringRef>& columns)
}
}
+void MMCifParser::ParseEntityPoly(const std::vector<StringRef>& columns)
+{
+ // we assume that the entity cat. ALWAYS comes before the entity_poly cat.
+ // search entity
+ MMCifEntityDescMap::iterator edm_it =
+ entity_desc_map_.find(columns[indices_[ENTITY_ID]].str());
+
+ // store values in description map
+ if (edm_it != entity_desc_map_.end()) {
+ if (indices_[EP_TYPE] != -1) {
+ if(StringRef("polypeptide(D)", 14) == columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_PEPTIDE_D;
+ } else if(StringRef("polypeptide(L)", 14) == columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_PEPTIDE_L;
+ } else if(StringRef("polydeoxyribonucleotide", 23) ==
+ columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_DN;
+ } else if(StringRef("polyribonucleotide", 18) ==
+ columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_RN;
+ } else if(StringRef("polysaccharide(D)", 17) ==
+ columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_SAC_D;
+ } else if(StringRef("polysaccharide(L)", 17) ==
+ columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_SAC_L;
+ } else if(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid",
+ 49) == columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_POLY_DN_RN;
+ } else if(StringRef("other",
+ 5) == columns[indices_[EP_TYPE]]) {
+ edm_it->second.type = CHAINTYPE_UNKNOWN;
+ } else {
+ throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+ "Unrecognised polymner type '" +
+ columns[indices_[EP_TYPE]].str() +
+ "' found.",
+ this->GetCurrentLinenum()));
+ }
+ }
+ } else {
+ throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+ "'entity_poly' category defined before 'entity' for id '" +
+ columns[indices_[ENTITY_ID]].str() +
+ "' or missing.",
+ this->GetCurrentLinenum()));
+ }
+}
+
void MMCifParser::OnDataRow(const StarLoopDesc& header,
const std::vector<StringRef>& columns)
{
@@ -478,6 +533,11 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header,
case ENTITY:
LOG_TRACE("processing entity entry");
this->ParseEntity(columns);
+ break;
+ case ENTITY_POLY:
+ LOG_TRACE("processing entity_poly entry");
+ this->ParseEntityPoly(columns);
+ break;
default:
return;
}
diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh
index f720fadc3..b63e5c51d 100644
--- a/modules/io/src/mol/mmcif_reader.hh
+++ b/modules/io/src/mol/mmcif_reader.hh
@@ -155,6 +155,11 @@ public:
/// \param columns data row
void ParseEntity(const std::vector<StringRef>& columns);
+ /// \brief Fetch MMCif entity_poly information
+ ///
+ /// \param columns data row
+ void ParseEntityPoly(const std::vector<StringRef>& columns);
+
private:
/// \enum magic numbers of this class
typedef enum {
@@ -191,10 +196,17 @@ private:
DETAILS ///< special aspects of the entity
} EntityItems;
+ /// \enum items of the entity_poly category
+ typedef enum {
+ ENTITY_ID, ///< pointer to entity.id
+ EP_TYPE ///< type of polymer
+ } EntityPolyItems;
+
/// \enum categories of the mmcif format
typedef enum {
ATOM_SITE,
ENTITY,
+ ENTITY_POLY,
DONT_KNOW
} MMCifCategory;
diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc
index 45166d2e2..7134e2f9e 100644
--- a/modules/io/tests/test_mmcif_reader.cc
+++ b/modules/io/tests/test_mmcif_reader.cc
@@ -49,6 +49,7 @@ public:
using MMCifParser::ParseAtomIdent;
using MMCifParser::ParseAndAddAtom;
using MMCifParser::ParseEntity;
+ using MMCifParser::ParseEntityPoly;
using MMCifParser::TryStoreIdx;
};
@@ -276,10 +277,10 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_tests)
mmcif_p.Parse();
ch = eh.FindChain("A");
BOOST_CHECK(ch.IsValid());
- BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY);
+ BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY_PEPTIDE_L);
ch = eh.FindChain("C");
BOOST_CHECK(ch.IsValid());
- BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY);
+ BOOST_CHECK(ch.GetType() == CHAINTYPE_POLY_PEPTIDE_L);
ch = eh.FindChain("O");
BOOST_CHECK(ch.IsValid());
BOOST_CHECK(ch.GetType() == CHAINTYPE_WATER);
@@ -315,6 +316,94 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_tests)
BOOST_MESSAGE(" done.");
}
+BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests)
+{
+ BOOST_MESSAGE(" Running mmcif_entity_poly_tests...");
+ mol::ChainHandle ch;
+ IOProfile profile;
+ StarLoopDesc tmmcif_h;
+ // positive
+ // negative: unknown polymer type
+ mol::EntityHandle eh = mol::CreateEntity();
+ MMCifParser mmcif_p("testfiles/mmcif/atom_site.mmcif", eh, profile);
+
+ mmcif_p.Parse();
+
+
+ BOOST_MESSAGE(" testing missing corresponding entity entry...");
+ {
+ mol::EntityHandle eh = mol::CreateEntity();
+ std::vector<StringRef> columns;
+ TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh);
+
+ tmmcif_h.SetCategory(StringRef("entity_poly", 11));
+ tmmcif_h.Add(StringRef("entity_id", 9));
+ tmmcif_p.OnBeginLoop(tmmcif_h);
+
+ columns.push_back(StringRef("1", 1));
+ BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException);
+ }
+ BOOST_MESSAGE(" done.");
+ BOOST_MESSAGE(" testing type recognition...");
+ {
+ TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh);
+ std::vector<StringRef> columns;
+
+ // create corresponding entity entry
+ tmmcif_h.Clear();
+ tmmcif_h.SetCategory(StringRef("entity", 6));
+ tmmcif_h.Add(StringRef("id", 2));
+ tmmcif_h.Add(StringRef("type", 4));
+ tmmcif_p.OnBeginLoop(tmmcif_h);
+ columns.push_back(StringRef("1", 1));
+ columns.push_back(StringRef("polymer", 7));
+ tmmcif_p.ParseEntity(columns);
+ columns.pop_back();
+ columns.pop_back();
+
+ // build dummy entity_poly header
+ tmmcif_h.Clear();
+ tmmcif_h.SetCategory(StringRef("entity_poly", 11));
+ tmmcif_h.Add(StringRef("entity_id", 9));
+ tmmcif_h.Add(StringRef("type", 4));
+ tmmcif_p.OnBeginLoop(tmmcif_h);
+
+ columns.push_back(StringRef("1", 1));
+ columns.push_back(StringRef("polypeptide(D)", 14));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.push_back(StringRef("polypeptide(L)", 14));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.push_back(StringRef("polydeoxyribonucleotide", 23));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.push_back(StringRef("polyribonucleotide", 18));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.push_back(StringRef("polysaccharide(D)", 17));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.push_back(StringRef("polysaccharide(L)", 17));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+columns.push_back(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid",
+ 49));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.push_back(StringRef("other", 5));
+ BOOST_CHECK_NO_THROW(tmmcif_p.ParseEntityPoly(columns));
+ columns.pop_back();
+ columns.pop_back();
+ columns.push_back(StringRef("badbadprion", 11));
+ BOOST_CHECK_THROW(tmmcif_p.ParseEntityPoly(columns), IOException);
+ columns.pop_back();
+ }
+ BOOST_MESSAGE(" done.");
+
+ BOOST_MESSAGE(" done.");
+}
+
BOOST_AUTO_TEST_CASE(mmcif_parseatomident)
{
BOOST_MESSAGE(" Running mmcif_parseatomident tests...");
diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif
index cfee5fe46..5987ec461 100644
--- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif
+++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif
@@ -15,6 +15,11 @@ _entity.details
;
5 water .
+_entity_poly.entity_id 1
+_entity_poly.type 'polypeptide(L)'
+_entity_poly.nstd_linkage no
+_entity_poly.nstd_monomer no
+
loop_
_atom_site.group_PDB
_atom_site.type_symbol
--
GitLab