diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 8136df7aa4d6f2bf9643ee582b7a2c036af64755..b0c0459239dd14218118441ff90225a8446a7023 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -34,6 +34,12 @@ of the annotation available. Also available as :meth:`GetCitations`. + .. attribute:: biounits + + Stores a list of biounits (:class:`MMCifInfoBioUnit`). + + Also available as :meth:`GetBioUnits`. + .. attribute:: method Stores the experimental method used to create the structure. @@ -48,6 +54,14 @@ of the annotation available. Also available as :meth:`GetResolution`. May also be modified by :meth:`SetResolution`. + .. attribute:: operations + + Stores the operations needed to transform a crystal structure into a + biounit. + + Also available as :meth:`GetOperations`. May also be modified by + :meth:`AddOperation`. + .. method:: AddCitation(citation) Add a citation to the citation list of an info object. @@ -68,6 +82,17 @@ of the annotation available. See :attr:`citations` + .. method:: AddBioUnit(biounit) + + Add a biounit to the biounit list of an info object. + + :param biounit: Biounit to be added. + :type biounit: :class:`MMCifInfoBioUnit` + + .. method:: GetBioUnits() + + See :attr:`citations` + .. method:: SetMethod(method) See :attr:`method` @@ -84,6 +109,14 @@ of the annotation available. See :attr:`resolution` + .. method:: AddOperation(operation) + + See :attr:`operations` + + .. method:: GetOperations() + + See :attr:`operations` + .. class:: MMCifInfoCitation This stores citation information from an input file. @@ -265,3 +298,113 @@ of the annotation available. .. method:: SetAuthorList(list) See :attr:`authors` + + +.. class:: MMCifInfoTransOperation + + This stores operations needed to transform an + :class:`entity <ost.mol.EntityHandle>` into a biounit. + + .. attribute:: id + + A unique identifier. If not provided, resembles an empty string. + + Also available as :meth:`GetID`. May also be modified by + :meth:`SetID`. + + .. attribute:: type + + Describes the operation. If not provided, resembles an empty string. + + Also available as :meth:`GetType`. May also be modified by + :meth:`SetType`. + + .. attribute:: translation + + The translational vector. Also available as :meth:`GetVector`. May also be + + modified by :meth:`SetVector`. + + .. attribute:: rotation + + The rotational matrix. Also available as :meth:`GetMatrix`. May also be + + modified by :meth:`SetMatrix`. + + .. method:: GetID() + + See :attr:`id` + + .. method:: SetID(id) + + See :attr:`id` + + .. method:: GetType() + + See :attr:`type` + + .. method:: SetType(type) + + See :attr:`type` + + .. method:: GetVector() + + See :attr:`translation` + + .. method:: SetVector(x, y, z) + + See :attr:`translation` + + .. method:: GetMatrix() + + See :attr:`rotation` + + .. method:: SetMatrix(i00,i01, i02, i10,i11, i12, i20,i21, i22) + + See :attr:`rotation` + +.. class:: MMCifInfoBioUnit + + This stores information how a structure is to be assembled to form the + biounit. + + .. attribute:: details + + Special asepcts of the biological assembly. If not provided, resembles an + empty string. + + Also available as :meth:`GetDetails`. May also be modified by + :meth:`SetDetails`. + + .. attribute:: chains + + Chains involved in this biounit. If not provided, resembles an empty list. + + Also available as :meth:`GetChainList`. May also be modified by + :meth:`AddChain`. + + .. attribute:: operations + + Translations and rotations needed to create the biounit. + + May be modified by :meth:`AddOperations` + + .. method:: GetDetails() + + See :attr:`details` + + .. method:: SetDetails(details) + + See :attr:`details` + + .. method:: GetChainList() + + See :attr:`chains` + + .. method:: AddChain(chain name) + + See :attr:`chains` + + .. method:: AddOperations(list of operations) + + See :attr:`operations` diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 632e11f9f522539ca1773c8c7cd12ee2c75e51bb..6da2ac02afaf8bb56ead9adac83e3360a23a2238 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -16,6 +16,7 @@ // along with this library; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ +#include <boost/shared_ptr.hpp> #include <boost/python.hpp> #include <boost/python/suite/indexing/vector_indexing_suite.hpp> using namespace boost::python; @@ -100,21 +101,89 @@ void export_mmcif_io() typedef std::vector<MMCifInfoCitation> MMCifInfoCitationList; class_<std::vector<MMCifInfoCitation> >("MMCifInfoCitationList", init<>()) .def(vector_indexing_suite<std::vector<MMCifInfoCitation> >()) - ; + ; + + + class_<MMCifInfoTransOp, MMCifInfoTransOpPtr>("MMCifInfoTransOp", init<>()) + .def("SetID", &MMCifInfoTransOp::SetID) + .def("GetID", &MMCifInfoTransOp::GetID) + .def("SetType", &MMCifInfoTransOp::SetType) + .def("GetType", &MMCifInfoTransOp::GetType) + .def("SetVector", &MMCifInfoTransOp::SetVector) + .def("GetVector", &MMCifInfoTransOp::GetVector) + .def("SetMatrix", &MMCifInfoTransOp::SetMatrix) + .def("GetMatrix", &MMCifInfoTransOp::GetMatrix) + .add_property("id", &MMCifInfoTransOp::GetID, + &MMCifInfoTransOp::SetID) + .add_property("type", &MMCifInfoTransOp::GetType, + &MMCifInfoTransOp::SetType) + .add_property("translation", &MMCifInfoTransOp::GetVector, + &MMCifInfoTransOp::SetVector) + .add_property("rotation", &MMCifInfoTransOp::GetMatrix, + &MMCifInfoTransOp::SetMatrix) + ; + + typedef std::vector<MMCifInfoTransOp> MMCifInfoTransOpList; + class_<std::vector<MMCifInfoTransOp> >("MMCifInfoTransOpList", init<>()) + .def(vector_indexing_suite<std::vector<MMCifInfoTransOp> >()) + ; + + typedef std::vector<MMCifInfoTransOpPtr> MMCifInfoTransOpPtrList; + class_<std::vector<MMCifInfoTransOpPtr> >("MMCifInfoTransOpPtrList", init<>()) + .def(vector_indexing_suite<std::vector<MMCifInfoTransOpPtr>, true >()) + ; + + typedef std::vector<MMCifInfoTransOpPtrList > MMCifInfoTransOpPtrListList; + class_<std::vector<MMCifInfoTransOpPtrList > >("MMCifInfoTransOpPtrListList", + init<>()) + .def(vector_indexing_suite<std::vector<MMCifInfoTransOpPtrList >, true >()) + ; + + class_<MMCifInfoBioUnit>("MMCifInfoBioUnit", init<>()) + .def("SetDetails", &MMCifInfoBioUnit::SetDetails) + .def("GetDetails", &MMCifInfoBioUnit::GetDetails) + .def("AddChain", &MMCifInfoBioUnit::AddChain) + .def("GetChainList", make_function(&MMCifInfoBioUnit::GetChainList, + return_value_policy<copy_const_reference>())) + .def("AddOperations", &MMCifInfoBioUnit::AddOperations) + .def("GetOperations", make_function(&MMCifInfoBioUnit::GetOperations, + return_value_policy<copy_const_reference>())) + .add_property("details", &MMCifInfoBioUnit::GetDetails, + &MMCifInfoBioUnit::SetDetails) + .add_property("chains", make_function(&MMCifInfoBioUnit::GetChainList, + return_value_policy<copy_const_reference>())) + .add_property("operations", make_function(&MMCifInfoBioUnit::GetOperations, + return_value_policy<copy_const_reference>())) + ; + + typedef std::vector<MMCifInfoBioUnit> MMCifInfoBioUnitList; + class_<std::vector<MMCifInfoBioUnit> >("MMCifInfoBioUnitList", init<>()) + .def(vector_indexing_suite<std::vector<MMCifInfoBioUnit> >()) + ; class_<MMCifInfo>("MMCifInfo", init<>()) .def("AddCitation", &MMCifInfo::AddCitation) .def("GetCitations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) + .def("AddBioUnit", &MMCifInfo::AddBioUnit) + .def("GetBioUnits", make_function(&MMCifInfo::GetBioUnits, + return_value_policy<copy_const_reference>())) .def("SetMethod", &MMCifInfo::SetMethod) .def("GetMethod", &MMCifInfo::GetMethod) .def("SetResolution", &MMCifInfo::SetResolution) .def("GetResolution", &MMCifInfo::GetResolution) .def("AddAuthorsToCitation", &MMCifInfo::AddAuthorsToCitation) + .def("AddOperation", &MMCifInfo::AddOperation) + .def("GetOperations", make_function(&MMCifInfo::GetOperations, + return_value_policy<copy_const_reference>())) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) + .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, + return_value_policy<copy_const_reference>())) .add_property("method", &MMCifInfo::GetMethod, &MMCifInfo::SetMethod) .add_property("resolution", &MMCifInfo::GetResolution, &MMCifInfo::SetResolution) + .add_property("operations", make_function(&MMCifInfo::GetOperations, + return_value_policy<copy_const_reference>())) ; } diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 5b257bebbdfbd9fec7ac4897074cc3e20a37df9b..b8b0aba50c34d60dff7607874322580c70c4c75a 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -20,11 +20,156 @@ #define OST_MMCIF_INFO_HH #include <vector> +#include <boost/shared_ptr.hpp> +#include <ost/geom/geom.hh> #include <ost/string_ref.hh> #include <ost/io/module_config.hh> namespace ost { namespace io { +class DLLEXPORT_OST_IO MMCifInfoTransOp { +public: + /// \brief Create an operation + MMCifInfoTransOp(): id_(""), type_("") + { + translation_ = geom::Vec3(); + }; + + /// \brief Set id + /// + /// \param id id + void SetID(String id) { id_ = id; } + /// \brief Get id + /// + /// \return id + String GetID() const { return id_; } + + /// \brief Set type + /// + /// \param type + void SetType(String type) { type_ = type; } + /// \brief Get type + /// + /// \return type + String GetType() const { return type_; } + + /// \brief Set the translational vector + /// + /// \param + void SetVector(Real x, Real y, Real z) + { + translation_.SetX(x); + translation_.SetY(y); + translation_.SetZ(z); + } + /// \brief Get the translational vector + /// + /// \return vector + geom::Vec3 GetVector() const { return translation_; } + + /// \brief Set the rotational matrix + /// + /// \param + void SetMatrix(Real i00, Real i01, Real i02, + Real i10, Real i11, Real i12, + Real i20, Real i21, Real i22) + { + rotation_ = geom::Mat3(i00,i01,i02, i10,i11,i12, i20,i21,i22); + } + /// \brief Get the rotational matrix + /// + /// \return matrix + geom::Mat3 GetMatrix() const { return rotation_; } + + bool operator==(const MMCifInfoTransOp& op) const { + if (this->id_ != op.id_) { + return false; + } + if (this->type_ != op.type_) { + return false; + } + if (this->translation_ != op.translation_) { + return false; + } + if (this->rotation_ != op.rotation_) { + return false; + } + + return true; + } + + bool operator!=(const MMCifInfoTransOp& op) const { + return !this->operator==(op); + } + +private: + String id_; ///< identifier + String type_; ///< type of operation + geom::Vec3 translation_; ///< translational vector + geom::Mat3 rotation_; ///< rotational matrix +}; +typedef boost::shared_ptr<MMCifInfoTransOp> MMCifInfoTransOpPtr; + + +class DLLEXPORT_OST_IO MMCifInfoBioUnit { +public: + /// \brief Create a biounit. + MMCifInfoBioUnit(): details_("") {}; + + /// \brief Set details + /// + /// \param details details + void SetDetails(String details) { details_ = details; } + /// \brief Get details + /// + /// \return details + String GetDetails() const { return details_; } + + /// \brief Add a chain name + /// + /// \param chain chain name + void AddChain(String chain) { chains_.push_back(chain); } + /// \brief Get vector of chain names + /// + /// \return chains + const std::vector<String>& GetChainList() const { return chains_; } + + /// \brief Add a set of operations + /// + /// \param operations vector of operations to be added + void AddOperations(std::vector<MMCifInfoTransOpPtr> operations) + { + operations_.push_back(operations); + } + /// \brief Get the list of operations + /// + /// \return vector of vectors of iterators. + const std::vector<std::vector<MMCifInfoTransOpPtr> >& GetOperations() + { + return operations_; + } + + bool operator==(const MMCifInfoBioUnit& bu) const { + if (this->details_ != bu.details_) { + return false; + } + if (this->chains_ != bu.chains_) { + return false; + } + + return true; + } + + bool operator!=(const MMCifInfoBioUnit& bu) const { + return !this->operator==(bu); + } + +private: + String details_; ///< pdbx_struct_assembly.details + std::vector<String> chains_; ///< chains involved in this assembly + std::vector<std::vector<MMCifInfoTransOpPtr> > operations_; +}; + class DLLEXPORT_OST_IO MMCifInfoCitation { public: /// \brief Create a citation. @@ -218,7 +363,6 @@ private: UNKNOWN } MMCifInfoCType; - //CITATION_ID String id_; ///< internal identifier MMCifInfoCType where_; ///< journal or book? String cas_; ///< CAS identifier @@ -292,13 +436,47 @@ public: /// \return experiment resolution Real GetResolution() const { return resolution_; } + /// \brief Add a biounit + /// + /// \param bu biounit to be added + void AddBioUnit(MMCifInfoBioUnit bu) // unit test + { + biounits_.push_back(bu); + } + + /// \brief Get the list of biounits stored in an info object. + /// + /// \return vector of MMCifInfoBioUnit objects + const std::vector<MMCifInfoBioUnit>& GetBioUnits() const + { + return biounits_; + } + + /// \brief Add a operation + /// + /// \param op operation to be added + void AddOperation(MMCifInfoTransOpPtr op) // unit test + { + transops_.push_back(op); + } + + /// \brief Get the list of operations stored in an info object. + /// + /// \return vector of MMCifInfoTransOp objects + const std::vector<MMCifInfoTransOpPtr>& GetOperations() const + { + return transops_; + } + //protected: private: // members String exptl_method_; Real resolution_; - std::vector<MMCifInfoCitation> citations_; ///< list of citations + std::vector<MMCifInfoCitation> citations_; ///< list of citations + std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits + std::vector<MMCifInfoTransOpPtr> transops_; }; }} // ns diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 96e6c483918b873c435d61e2f8f849ebebb0e777..814ee0662003e0dc45e26ac6fda1d6cc49116a2f 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -78,6 +78,8 @@ void MMCifParser::ClearState() info_ = MMCifInfo(); entity_desc_map_.clear(); authors_map_.clear(); + bu_origin_map_.clear(); + bu_assemblies_.clear(); } void MMCifParser::SetRestrictChains(const String& restrict_chains) @@ -209,6 +211,40 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) this->TryStoreIdx(LS_D_RES_HIGH, "ls_d_res_high", header); this->TryStoreIdx(LS_D_RES_LOW, "ls_d_res_low", header); cat_available = true; + } else if (header.GetCategory() == "pdbx_struct_assembly") { + category_ = PDBX_STRUCT_ASSEMBLY; + // mandatory items + this->TryStoreIdx(PSA_ID, "id", header); + // optional + indices_[PSA_DETAILS] = header.GetIndex("details"); + //indices_[METHOD_DETAILS] = header.GetIndex("method_details"); + cat_available = true; + } else if (header.GetCategory() == "pdbx_struct_assembly_gen") { + category_ = PDBX_STRUCT_ASSEMBLY_GEN; + // mandatory items + this->TryStoreIdx(ASSEMBLY_ID, "assembly_id", header); + this->TryStoreIdx(ASYM_ID_LIST, "asym_id_list", header); + this->TryStoreIdx(OPER_EXPRESSION, "oper_expression", header); + cat_available = true; + } else if (header.GetCategory() == "pdbx_struct_oper_list") { + category_ = PDBX_STRUCT_OPER_LIST; + // mandatory items + this->TryStoreIdx(PSOL_ID, "id", header); + this->TryStoreIdx(PSOL_TYPE, "type", header); + // optional items + indices_[VECTOR_1] = header.GetIndex("vector[1]"); + indices_[VECTOR_2] = header.GetIndex("vector[2]"); + indices_[VECTOR_3] = header.GetIndex("vector[3]"); + indices_[MATRIX_1_1] = header.GetIndex("matrix[1][1]"); + indices_[MATRIX_1_2] = header.GetIndex("matrix[1][2]"); + indices_[MATRIX_1_3] = header.GetIndex("matrix[1][3]"); + indices_[MATRIX_2_1] = header.GetIndex("matrix[2][1]"); + indices_[MATRIX_2_2] = header.GetIndex("matrix[2][2]"); + indices_[MATRIX_2_3] = header.GetIndex("matrix[2][3]"); + indices_[MATRIX_3_1] = header.GetIndex("matrix[3][1]"); + indices_[MATRIX_3_2] = header.GetIndex("matrix[3][2]"); + indices_[MATRIX_3_3] = header.GetIndex("matrix[3][3]"); + cat_available = true; } category_counts_[category_]++; return cat_available; @@ -729,6 +765,184 @@ void MMCifParser::ParseRefine(const std::vector<StringRef>& columns) "refine.ls_d_res_high")); } +void MMCifParser::ParsePdbxStructAssembly(const std::vector<StringRef>& columns) +{ + if (indices_[PSA_DETAILS] != -1) { + bu_origin_map_.insert(std::pair<String, + String>(columns[indices_[PSA_ID]].str(), + columns[indices_[PSA_DETAILS]].str())); + } else { + bu_origin_map_.insert(std::pair<String, + String>(columns[indices_[PSA_ID]].str(), + "?")); + } +} + +void MMCifParser::StoreExpression(const char* l, const char* s, + bool& is_range, int lborder, + std::vector<String>& single_block) +{ + std::stringstream ss; + int rborder; + + if (l != s) { + if (is_range) { + is_range = false; + rborder = this->TryGetInt(StringRef(l, s-l), + "pdbx_struct_assembly_gen.oper_expression"); + for (lborder += 1; lborder < rborder; lborder++) { + ss << lborder; + single_block.push_back(ss.str()); + ss.str(""); + } + } + single_block.push_back(String(l, s-l)); + } +} + +void MMCifParser::StoreRange(const char*& l, const char* s, bool& is_range, + int& lborder, std::vector<String>& single_block) +{ + if (is_range) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_WARNING, + "pdbx_struct_assembly_gen.oper_expression is missing a right border for a range expression.", + this->GetCurrentLinenum())); + } + is_range = true; + if (l != s) { + lborder = this->TryGetInt(StringRef(l, s-l), + "pdbx_struct_assembly_gen.oper_expression"); + single_block.push_back(String(l, s-l)); + } + l = s+1; +} + +std::vector<std::vector<String> > MMCifParser::UnPackOperExperession(StringRef expression) +{ + std::vector<std::vector<String> > unpacked; + std::vector<String> single_block; + int lborder; + bool is_range = false; + std::stringstream ss; + const char* s = expression.begin(); + const char* e = expression.end(); + const char* l = expression.begin(); + + if (*s == '(') { + ++s; + ++l; + // multiple blocks + while (s != e) { + if (*s == ',') { + StoreExpression(l, s, is_range, lborder, single_block); + l = s+1; + } else if (*s == '-') { + StoreRange(l, s, is_range, lborder, single_block); + } else if (*s == '(') { + ++l; + } else if (*s == ')') { + StoreExpression(l, s, is_range, lborder, single_block); + l = s+1; + if (single_block.size() > 0) { + unpacked.push_back(single_block); + } + single_block.clear(); + } + ++s; + } + } else { + // single block + while (s != e) { + if (*s == ',') { + StoreExpression(l, s, is_range, lborder, single_block); + l = s+1; + } else if (*s == '-') { + StoreRange(l, s, is_range, lborder, single_block); + } + ++s; + } + StoreExpression(l, s, is_range, lborder, single_block); + + if (is_range) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_WARNING, + "pdbx_struct_assembly_gen.oper_expression is missing a right border for a range expression.", + this->GetCurrentLinenum())); + } + unpacked.push_back(single_block); + } + + return unpacked; +} + +void MMCifParser::ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns) +{ + MMCifBioUAssembly assembly; + assembly.biounit = MMCifInfoBioUnit(); + + assembly.biounit.SetDetails(columns[indices_[ASSEMBLY_ID]].str()); + + std::vector<StringRef> tmp_chains=columns[indices_[ASYM_ID_LIST]].split(','); + std::vector<StringRef>::const_iterator tc_it; + for (tc_it = tmp_chains.begin(); tc_it != tmp_chains.end(); ++tc_it) { + assembly.biounit.AddChain(tc_it->str()); + } + + assembly.operations = + this->UnPackOperExperession(columns[indices_[OPER_EXPRESSION]]); + + bu_assemblies_.push_back(assembly); +} + +void MMCifParser::ParsePdbxStructOperList(const std::vector<StringRef>& columns) +{ + MMCifInfoTransOpPtr op(new MMCifInfoTransOp); + + op->SetID(columns[indices_[PSOL_ID]].str()); + op->SetType(columns[indices_[PSOL_TYPE]].str()); + + if ((indices_[VECTOR_1] != -1)&& + (indices_[VECTOR_2] != -1)&& + (indices_[VECTOR_3] != -1)) { + op->SetVector(this->TryGetReal(columns[indices_[VECTOR_1]], + "pdbx_struct_oper_list.vector[1]"), + this->TryGetReal(columns[indices_[VECTOR_2]], + "pdbx_struct_oper_list.vector[2]"), + this->TryGetReal(columns[indices_[VECTOR_3]], + "pdbx_struct_oper_list.vector[3]")); + } + + if ((indices_[MATRIX_1_1] != -1)&& + (indices_[MATRIX_1_2] != -1)&& + (indices_[MATRIX_1_3] != -1)&& + (indices_[MATRIX_2_1] != -1)&& + (indices_[MATRIX_2_2] != -1)&& + (indices_[MATRIX_2_3] != -1)&& + (indices_[MATRIX_3_1] != -1)&& + (indices_[MATRIX_3_2] != -1)&& + (indices_[MATRIX_3_3] != -1)) { + op->SetMatrix(this->TryGetReal(columns[indices_[MATRIX_1_1]], + "pdbx_struct_oper_list.matrix[1][1]"), + this->TryGetReal(columns[indices_[MATRIX_1_2]], + "pdbx_struct_oper_list.matrix[1][2]"), + this->TryGetReal(columns[indices_[MATRIX_1_3]], + "pdbx_struct_oper_list.matrix[1][3]"), + this->TryGetReal(columns[indices_[MATRIX_2_1]], + "pdbx_struct_oper_list.matrix[2][1]"), + this->TryGetReal(columns[indices_[MATRIX_2_2]], + "pdbx_struct_oper_list.matrix[2][2]"), + this->TryGetReal(columns[indices_[MATRIX_2_3]], + "pdbx_struct_oper_list.matrix[2][3]"), + this->TryGetReal(columns[indices_[MATRIX_3_1]], + "pdbx_struct_oper_list.matrix[3][1]"), + this->TryGetReal(columns[indices_[MATRIX_3_2]], + "pdbx_struct_oper_list.matrix[3][2]"), + this->TryGetReal(columns[indices_[MATRIX_3_3]], + "pdbx_struct_oper_list.matrix[3][3]")); + } + + info_.AddOperation(op); +} + void MMCifParser::OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -761,6 +975,18 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing refine entry") this->ParseRefine(columns); break; + case PDBX_STRUCT_ASSEMBLY: + LOG_TRACE("processing pdbx_struct_assembly entry") + this->ParsePdbxStructAssembly(columns); + break; + case PDBX_STRUCT_ASSEMBLY_GEN: + LOG_TRACE("processing pdbx_struct_assembly_gen entry") + this->ParsePdbxStructAssemblyGen(columns); + break; + case PDBX_STRUCT_OPER_LIST: + LOG_TRACE("processing pdbx_struct_oper_list entry") + this->ParsePdbxStructOperList(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -795,14 +1021,62 @@ void MMCifParser::OnEndData() // process citations (couple with authors // iterate citations MMCifCitationAuthorMap::const_iterator atm_it; - std::vector<String>::const_iterator atv_it; - std::vector<int>::const_iterator pos_it; for (atm_it = authors_map_.begin(); atm_it != authors_map_.end(); ++atm_it) { info_.AddAuthorsToCitation(StringRef(atm_it->first.c_str(), atm_it->first.length()), atm_it->second.second); } + bool found; + MMCifBioUAssemblyVector::iterator bua_it; + std::vector<std::vector<String> >::const_iterator aol_it; + std::vector<String>::const_iterator aob_it; + std::vector<MMCifInfoTransOpPtr> operation_list; + std::map<String, String>::const_iterator buom_it; + std::vector<MMCifInfoTransOpPtr> operations = info_.GetOperations(); + std::vector<MMCifInfoTransOpPtr>::const_iterator buop_it; + for (bua_it = bu_assemblies_.begin(); + bua_it != bu_assemblies_.end(); + ++bua_it) { + // pair with pdbx_struct_assembly entry + buom_it = bu_origin_map_.find(bua_it->biounit.GetDetails()); + if (buom_it == bu_origin_map_.end()) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "No pdbx_struct_assembly.id '"+ + bua_it->biounit.GetDetails() + + "' found as requested by pdbx_struct_assembly_gen.")); + } + bua_it->biounit.SetDetails(buom_it->second); + + // pair with pdbx_struct_oper_list + for (aol_it = bua_it->operations.begin(); + aol_it != bua_it->operations.end(); + ++aol_it) { + operation_list.clear(); + for (aob_it = aol_it->begin(); aob_it != aol_it->end(); aob_it++) { + found = false; + for (buop_it = operations.begin(); + buop_it != operations.end(); + ++buop_it) { + if ((*buop_it)->GetID() == *aob_it) { + operation_list.push_back(*buop_it); + found = true; + break; + } + } + if (!found) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "No pdbx_struct_oper_list.id '"+ + *aob_it + + "' found as requested by pdbx_struct_assembly_gen.")); + } + } + bua_it->biounit.AddOperations(operation_list); + } + info_.AddBioUnit(bua_it->biounit); + } + bu_assemblies_.clear(); + LOG_INFO("imported " << chain_count_ << " chains, " << residue_count_ << " residues, " diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index e8094ab77bdd588bc3488a25561777dcecd2d09e..6204545f5dd64ada2917990a2c8728ac8976a941 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -21,9 +21,7 @@ #include <map> -//#include <boost/iostreams/filtering_stream.hpp> -//#include <boost/filesystem/fstream.hpp> - +#include <ost/geom/geom.hh> #include <ost/seq/sequence_list.hh> #include <ost/mol/residue_handle.hh> #include <ost/mol/chain_type.hh> @@ -47,6 +45,9 @@ namespace ost { namespace io { /// \li entity /// \li entity_poly /// \li citation +/// \li pdbx_struct_assembly +/// \li pdbx_struct_assembly_gen +/// \li pdbx_struct_oper_list class DLLEXPORT_OST_IO MMCifParser : public StarParser { public: /// \brief create a MMCifParser @@ -236,6 +237,31 @@ protected: /// \param columns data row void ParseRefine(const std::vector<StringRef>& columns); + /// \brief Fetch MMCif pdbx_struct_assembly information + /// + /// \param columns data row + void ParsePdbxStructAssembly(const std::vector<StringRef>& columns); + + /// \brief Fetch MMCif pdbx_struct_assembly_gen information + /// + /// \param columns data row + void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns); + + std::vector<std::vector<String> > UnPackOperExperession(StringRef expression); + + void StoreExpression(const char* l, const char* s, + bool& is_range, int lborder, + std::vector<String>& single_block); + + void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder, + std::vector<String>& single_block); + + /// \brief Fetch MMCif pdbx_struct_oper_list information + /// + /// \param columns data row + void ParsePdbxStructOperList(const std::vector<StringRef>& columns); + + private: /// \enum magic numbers of this class typedef enum { @@ -311,11 +337,43 @@ private: /// \enum items of the refine category typedef enum { - REFINE_ENTRY_ID, - LS_D_RES_HIGH, + REFINE_ENTRY_ID, ///< id + LS_D_RES_HIGH, ///< crystal resolution LS_D_RES_LOW } RefineItems; + /// \enum items of the pdbx_struct_assembly category + typedef enum { + PSA_DETAILS, ///< special aspects of the assembly + PSA_ID, ///< unique identifier + METHOD_DETAILS ///< details about assembly computation + } PdbxStructAssemblyItems; + + /// \enum items of the pdbx_struct_assembly_gen category + typedef enum { + ASSEMBLY_ID, ///< link to pdbx_struct_assembly.id + ASYM_ID_LIST, ///< list of chains + OPER_EXPRESSION ///< list of pdbx_struct_oper_list.ids + } PdbxStructAssemblyGenItems; + + /// \enum items of the pdbx_struct_oper_list category + typedef enum { + PSOL_ID, ///< unique identifier + PSOL_TYPE, ///< type of operation + VECTOR_1, ///< vector component + VECTOR_2, ///< vector component + VECTOR_3, ///< vector component + MATRIX_1_1, ///< matrix component + MATRIX_1_2, ///< matrix component + MATRIX_1_3, ///< matrix component + MATRIX_2_1, ///< matrix component + MATRIX_2_2, ///< matrix component + MATRIX_2_3, ///< matrix component + MATRIX_3_1, ///< matrix component + MATRIX_3_2, ///< matrix component + MATRIX_3_3 ///< matrix component + } PdbxStructOperListItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -325,6 +383,9 @@ private: CITATION_AUTHOR, EXPTL, REFINE, + PDBX_STRUCT_ASSEMBLY, + PDBX_STRUCT_ASSEMBLY_GEN, + PDBX_STRUCT_OPER_LIST, DONT_KNOW } MMCifCategory; @@ -334,9 +395,16 @@ private: String details; ///< description of this entity String seqres; ///< chain of monomers } MMCifEntityDesc; - typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap; - //typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > > + + /// \struct assembly information + typedef struct { + MMCifInfoBioUnit biounit; + std::vector<std::vector<String> > operations; ///< list of links to + /// MMCifBioUOperation + } MMCifBioUAssembly; + typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector; + typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > > MMCifCitationAuthorMap; @@ -366,6 +434,8 @@ private: bool read_seqres_; MMCifInfo info_; ///< info container MMCifCitationAuthorMap authors_map_; + MMCifBioUAssemblyVector bu_assemblies_; + std::map<String, String> bu_origin_map_; ///< pdbx_struct_assembly.details }; }} diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py index ba37343ebd9cdce201455f06fee820f7a3f9c1bc..d6d669aa96b5cb5e0f2ab784685b40f8a25bc36d 100644 --- a/modules/io/tests/test_io_mmcif.py +++ b/modules/io/tests/test_io_mmcif.py @@ -1,11 +1,11 @@ import unittest from ost import * -class TestPDB(unittest.TestCase): +class TestMMCifInfo(unittest.TestCase): def setUp(self): pass - def test_mmcifinfo(self): + def test_mmcifinfo_citation(self): c = io.MMCifInfoCitation() # test ID setting/ getting c.SetID('ID') @@ -63,6 +63,46 @@ class TestPDB(unittest.TestCase): self.assertEquals(i.GetMethod(), 'Deep-Fry') self.assertEquals(i.GetResolution(), 2.0) + + def test_mmcifinfo_biounit(self): + b = io.MMCifInfoBioUnit() + b.SetDetails('Details') + self.assertEquals(b.GetDetails(), 'Details') + b.AddChain('A') + cl = b.GetChainList() + self.assertEquals(cl[0], 'A') + + i = io.MMCifInfo() + i.AddBioUnit(b) + + bl = i.GetBioUnits() + self.assertEquals(len(bl), 1) + + + def test_mmcifinfo_transoperation(self): + o = io.MMCifInfoTransOp() + o.SetID("1") + self.assertEquals(o.GetID(), '1') + o.SetType("identity operation") + self.assertEquals(o.GetType(), 'identity operation') + o.SetVector(1.0, 2.0, 3.0) + self.assertEquals(o.GetVector().x, 1.0) + self.assertEquals(o.GetVector().y, 2.0) + self.assertEquals(o.GetVector().z, 3.0) + o.SetMatrix(1, 2, 3, 4, 5, 6, 7, 8, 9) + self.assertEquals(geom.Equal(o.GetMatrix(), + geom.Mat3(1, 2, 3, 4, 5, 6, 7, 8, 9)), True) + + i = io.MMCifInfo() + i.AddOperation(o) + ol = i.GetOperations() + self.assertEquals(ol[0].GetID(), '1') + + b = io.MMCifInfoBioUnit() + b.AddOperations(ol) + oll = b.GetOperations() + self.assertEquals(oll[0][0].GetID(), '1') + if __name__== '__main__': unittest.main() diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 72a33dbbec09a19f509ed8d7a5b0c959b9396852..238b71cdc95c7264ecf4c4642da746ec7e75c49b 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -79,6 +79,56 @@ BOOST_AUTO_TEST_CASE(mmcif_info_citation) BOOST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_info_biounit) +{ + BOOST_MESSAGE(" Running mmcif_info_biounit tests..."); + + MMCifInfoBioUnit bu = MMCifInfoBioUnit(); + + bu.SetDetails("author_defined_assembly"); + bu.AddChain("A"); + + BOOST_CHECK(bu.GetDetails() == "author_defined_assembly"); + BOOST_CHECK(bu.GetChainList().back() == "A"); + + MMCifInfo info = MMCifInfo(); + info.AddBioUnit(bu); + std::vector<MMCifInfoBioUnit> biounits = info.GetBioUnits(); + BOOST_CHECK(biounits.size() == 1); + BOOST_CHECK(biounits.back() == bu); + + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_info_transoperation) +{ + BOOST_MESSAGE(" Running mmcif_info_transoperation tests..."); + + MMCifInfoTransOpPtr op(new MMCifInfoTransOp); + + op->SetID("1"); + op->SetType("identity operation"); + op->SetVector(1, 2, 3); + op->SetMatrix(1, 2, 3, 4, 5, 6, 7 ,8, 9); + + BOOST_CHECK(op->GetID() == "1"); + BOOST_CHECK(op->GetVector() == geom::Vec3(1, 2, 3)); + BOOST_CHECK(op->GetMatrix() == geom::Mat3(1, 2, 3, 4, 5, 6, 7, 8, 9)); + BOOST_CHECK(op->GetType() == "identity operation"); + + MMCifInfo info = MMCifInfo(); + info.AddOperation(op); + BOOST_CHECK(info.GetOperations().back() == op); + + std::vector<MMCifInfoTransOpPtr> ops; + ops.push_back(*(info.GetOperations().begin())); + MMCifInfoBioUnit bu = MMCifInfoBioUnit(); + bu.AddOperations(ops); + BOOST_CHECK((*(bu.GetOperations().begin()->begin())) == op); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_info) { BOOST_MESSAGE(" Running mmcif_info tests..."); diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index a424553fc5c0facaa4b974f2558757245374db9a..6ae040448b1d63f0ac4d91a15a2965cde816a420 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -48,6 +48,7 @@ public: { } using MMCifParser::OnBeginLoop; + using MMCifParser::OnEndData; using MMCifParser::IsValidPDBIdent; using MMCifParser::ParseAtomIdent; using MMCifParser::ParseAndAddAtom; @@ -55,6 +56,9 @@ public: using MMCifParser::ParseEntityPoly; using MMCifParser::ParseCitation; using MMCifParser::ParseRefine; + using MMCifParser::ParsePdbxStructAssemblyGen; + using MMCifParser::ParsePdbxStructAssembly; + using MMCifParser::ParsePdbxStructOperList; using MMCifParser::TryStoreIdx; using MMCifParser::SetReadSeqRes; using MMCifParser::SetReadCanonicalSeqRes; @@ -604,6 +608,127 @@ BOOST_AUTO_TEST_CASE(mmcif_refine_tests) BOOST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_biounit_tests) +{ + BOOST_MESSAGE(" Running mmcif_biounit_tests..."); + //build dummy biounit + mol::EntityHandle eh = mol::CreateEntity(); + TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + StarLoopDesc tmmcif_h; + std::vector<StringRef> columns; + + tmmcif_h.SetCategory(StringRef("pdbx_struct_assembly_gen", 24)); + tmmcif_h.Add(StringRef("assembly_id", 11)); + tmmcif_h.Add(StringRef("asym_id_list", 12)); + tmmcif_h.Add(StringRef("oper_expression", 15)); + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("A", 1)); + columns.push_back(StringRef("1", 1)); + + BOOST_CHECK_NO_THROW(tmmcif_p.ParsePdbxStructAssemblyGen(columns)); + BOOST_CHECK_THROW(tmmcif_p.OnEndData(), IOException); + + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("pdbx_struct_assembly", 20)); + tmmcif_h.Add(StringRef("id", 2)); + columns.pop_back(); + columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("1", 1)); + tmmcif_p.OnBeginLoop(tmmcif_h); + tmmcif_p.ParsePdbxStructAssembly(columns); + + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("pdbx_struct_assembly_gen", 24)); + tmmcif_h.Add(StringRef("assembly_id", 11)); + tmmcif_h.Add(StringRef("asym_id_list", 12)); + tmmcif_h.Add(StringRef("oper_expression", 15)); + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.pop_back(); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("A", 1)); + columns.push_back(StringRef("1", 1)); + + BOOST_CHECK_NO_THROW(tmmcif_p.ParsePdbxStructAssemblyGen(columns)); + BOOST_CHECK_THROW(tmmcif_p.OnEndData(), IOException); + + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("pdbx_struct_assembly_gen", 24)); + tmmcif_h.Add(StringRef("assembly_id", 11)); + tmmcif_h.Add(StringRef("asym_id_list", 12)); + tmmcif_h.Add(StringRef("oper_expression", 15)); + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.pop_back(); + columns.push_back(StringRef("1-Z", 3)); + BOOST_CHECK_THROW(tmmcif_p.ParsePdbxStructAssemblyGen(columns), IOException); + + columns.pop_back(); + columns.push_back(StringRef("--", 3)); + BOOST_CHECK_THROW(tmmcif_p.ParsePdbxStructAssemblyGen(columns), IOException); + + columns.pop_back(); + columns.push_back(StringRef("A-3", 3)); + BOOST_CHECK_THROW(tmmcif_p.ParsePdbxStructAssemblyGen(columns), IOException); + + tmmcif_h.Clear(); + tmmcif_h.SetCategory(StringRef("pdbx_struct_oper_list", 21)); + tmmcif_h.Add(StringRef("id", 2)); + tmmcif_h.Add(StringRef("type", 4)); + tmmcif_h.Add(StringRef("vector[1]", 9)); + tmmcif_h.Add(StringRef("vector[2]", 9)); + tmmcif_h.Add(StringRef("vector[3]", 9)); + tmmcif_h.Add(StringRef("matrix[1][1]", 12)); + tmmcif_h.Add(StringRef("matrix[1][2]", 12)); + tmmcif_h.Add(StringRef("matrix[1][3]", 12)); + tmmcif_h.Add(StringRef("matrix[2][1]", 12)); + tmmcif_h.Add(StringRef("matrix[2][2]", 12)); + tmmcif_h.Add(StringRef("matrix[2][3]", 12)); + tmmcif_h.Add(StringRef("matrix[3][1]", 12)); + tmmcif_h.Add(StringRef("matrix[3][2]", 12)); + tmmcif_h.Add(StringRef("matrix[3][3]", 12)); + + + + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.pop_back(); + columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("Nan", 3)); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("A", 1)); + columns.push_back(StringRef("3", 1)); + BOOST_CHECK_THROW(tmmcif_p.ParsePdbxStructOperList(columns), IOException); + + columns.pop_back(); + columns.pop_back(); + columns.pop_back(); + columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("Nan", 3)); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("2", 1)); + columns.push_back(StringRef("3", 1)); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("2", 1)); + columns.push_back(StringRef("3", 1)); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("2", 1)); + columns.push_back(StringRef("3", 1)); + columns.push_back(StringRef("1", 1)); + columns.push_back(StringRef("A", 1)); + columns.push_back(StringRef("3", 1)); + BOOST_CHECK_THROW(tmmcif_p.ParsePdbxStructOperList(columns), IOException); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_parseatomident) { BOOST_MESSAGE(" Running mmcif_parseatomident tests..."); @@ -703,6 +828,13 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader) BOOST_MESSAGE(" reading data fields which should not fail..."); BOOST_CHECK(mmcif_p.GetInfo().GetMethod().str() == "Deep-fry"); + BOOST_CHECK(mmcif_p.GetInfo().GetBioUnits().back().GetDetails() == + "author_defined_assembly"); + BOOST_CHECK(mmcif_p.GetInfo().GetBioUnits().back().GetChainList().back() == + "F"); + MMCifInfoBioUnit bu = mmcif_p.GetInfo().GetBioUnits().back(); + BOOST_CHECK(bu.GetOperations().back().back()->GetType() == + "identity operation"); BOOST_MESSAGE(" done."); BOOST_MESSAGE(" done."); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index f6a1b63d4a3095a861a9f85fbd31bf6193578a8e..b10009871d934a8ece41ad4e2c1fcc1a07ed661e 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -50,6 +50,41 @@ _refine.entry_id '1BAR' _refine.ls_d_res_high 2.0 _refine.ls_d_res_low 1.5 +# biounit begin +loop_ +_pdbx_struct_assembly.id +_pdbx_struct_assembly.details +_pdbx_struct_assembly.method_details +_pdbx_struct_assembly.oligomeric_details +_pdbx_struct_assembly.oligomeric_count +1 author_defined_assembly ? monomeric 1 +2 author_defined_assembly ? monomeric 1 + +loop_ +_pdbx_struct_assembly_gen.assembly_id +_pdbx_struct_assembly_gen.oper_expression +_pdbx_struct_assembly_gen.asym_id_list +1 1 A,C,E +2 1 B,D,F + +_pdbx_struct_oper_list.id 1 +_pdbx_struct_oper_list.type 'identity operation' +_pdbx_struct_oper_list.name 1_555 +_pdbx_struct_oper_list.symmetry_operation x,y,z +_pdbx_struct_oper_list.matrix[1][1] 1.0000000000 +_pdbx_struct_oper_list.matrix[1][2] 0.0000000000 +_pdbx_struct_oper_list.matrix[1][3] 0.0000000000 +_pdbx_struct_oper_list.vector[1] 1.1000000000 +_pdbx_struct_oper_list.matrix[2][1] 0.0000000000 +_pdbx_struct_oper_list.matrix[2][2] 1.0000000000 +_pdbx_struct_oper_list.matrix[2][3] 0.0000000000 +_pdbx_struct_oper_list.vector[2] 0.5000000000 +_pdbx_struct_oper_list.matrix[3][1] 0.0000000000 +_pdbx_struct_oper_list.matrix[3][2] 0.0000000000 +_pdbx_struct_oper_list.matrix[3][3] 1.0000000000 +_pdbx_struct_oper_list.vector[3] 23.3000000000 +# biounit end + loop_ _atom_site.group_PDB _atom_site.type_symbol