diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 837614d183facfc65111efacf1874d77ef788b00..df9a948bd44cb5b17c9e3174d97c6cc64fb83e01 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -130,7 +130,14 @@ of the annotation available. .. method:: AddBioUnit(biounit) - Add a bio unit to the bio unit list of an info object. + Add a bio unit to the bio unit list of an info object. If the + :attr:`id <MMCifInfoBioUnit.id>` of ``biounit`` already exists in the set + of assemblies, both will be merged. This means that + :attr:`chain <MMCifInfoBioUnit.chains>` and + :attr:`operations <MMCifInfoBioUnit.operations>` lists will be concatenated + and the interval lists + (:attr:`operationsintervalls <MMCifInfoBioUnit.operationsintervalls>`, + :attr:`chainintervalls <MMCifInfoBioUnit.chainintervalls>`) will be updated. :param biounit: Bio unit to be added. :type biounit: :class:`MMCifInfoBioUnit` @@ -501,6 +508,16 @@ of the annotation available. Also available as :meth:`GetChainList`. May also be modified by :meth:`AddChain` or :meth:`SetChainList`. + .. attribute:: chainintervalls + + List of intervals on the chain list. Needed if there a several sets of + chains and transformations to create the bio unit. Comes as a list of + tuples. First component is the start, second is the right border of the + interval. + + Also available as :meth:`GetChainIntervalList`. Is automatically modified by + :meth:`AddChain`, :meth:`SetChainList` and :meth:`MMCifInfo.AddBioUnit`. + .. attribute:: operations Translations and rotations needed to create the bio unit. Filled with @@ -509,6 +526,16 @@ of the annotation available. Also available as :meth:`GetOperations`. May be modified by :meth:`AddOperations` + .. attribute:: operationsintervalls + + List of intervals on the operations list. Needed if there a several sets of + chains and transformations to create the bio unit. Comes as a list of + tuples. First component is the start, second is the right border of the + interval. + + Also available as :meth:`GetOperationsIntervalList`. Is automatically + modified by :meth:`AddOperations` and :meth:`MMCifInfo.AddBioUnit`. + .. method:: GetID() See :attr:`id` @@ -531,14 +558,20 @@ of the annotation available. .. method:: SetChainList(chains) + See :attr:`chains`, also resets :attr:`chainintervalls` to contain only one + interval enclosing the whole chain list. + :param chains: List of chain names. :type chains: :class:`~ost.StringList` - See :attr:`chains` - .. method:: AddChain(chain name) - See :attr:`chains` + See :attr:`chains`, also extends the right border of the last entry in + :attr:`chainintervalls`. + + .. method:: GetChainIntervalList() + + See :attr:`chainintervalls` .. method:: GetOperations() @@ -546,7 +579,12 @@ of the annotation available. .. method:: AddOperations(list of operations) - See :attr:`operations` + See :attr:`operations`, also extends the right border of the last entry in + :attr:`operationsintervalls`. + + .. method:: GetOperationsIntervalList() + + See :attr:`operationsintervalls` .. function:: PDBize(asu, seqres=None, min_polymer_size=10, transformation=False) @@ -957,4 +995,6 @@ of the annotation available. .. LocalWords: GetNum num GetStatus GetLastDate GetFirstRelease storable .. LocalWords: SetChainList MMCifInfoTransOp ChainTypes MMCifInfoStructRef .. LocalWords: MMCifInfoRevisions bool difs MMCifInfoStructRefSeqDif rnum -.. LocalWords: SetDateOriginal GetDateOriginal yyyy +.. LocalWords: SetDateOriginal GetDateOriginal yyyy operationsintervalls +.. LocalWords: chainintervalls GetChainIntervalList +.. LocalWords: GetOperationsIntervalList diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 5f226e9fef82c3409d94e31ced923b4456dc4393..9a21bdb1af003bdaa33406e7f7e98754f8a606ea 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -21,6 +21,7 @@ #include <boost/python/suite/indexing/vector_indexing_suite.hpp> using namespace boost::python; +#include <ost/export_helper/pair_to_tuple_conv.hh> #include <ost/io/mol/io_profile.hh> #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/mol/mmcif_info.hh> @@ -172,6 +173,14 @@ void export_mmcif_io() .add_property("seq_rnum", &MMCifInfoStructRefSeqDif::GetSeqRNum) .add_property("db_rnum", &MMCifInfoStructRefSeqDif::GetDBRNum) ; + + typedef std::pair<int, int> IntPair; + to_python_converter<IntPair, PairToTupleConverter<int, int> >(); + typedef std::vector<IntPair> VectorIntPair; + class_<VectorIntPair>("VectorIntPair", init<>()) + .def(vector_indexing_suite<VectorIntPair, true>()) + ; + class_<MMCifInfoBioUnit>("MMCifInfoBioUnit", init<>()) .def("SetDetails", &MMCifInfoBioUnit::SetDetails) .def("GetDetails", &MMCifInfoBioUnit::GetDetails) @@ -179,17 +188,29 @@ void export_mmcif_io() .def("SetChainList", &MMCifInfoBioUnit::SetChainList) .def("GetChainList", make_function(&MMCifInfoBioUnit::GetChainList, return_value_policy<copy_const_reference>())) + .def("GetChainIntervalList", + make_function(&MMCifInfoBioUnit::GetChainIntervalList, + return_value_policy<copy_const_reference>())) .def("AddOperations", &MMCifInfoBioUnit::AddOperations) .def("GetOperations", make_function(&MMCifInfoBioUnit::GetOperations, return_value_policy<copy_const_reference>())) + .def("GetOperationsIntervalList", + make_function(&MMCifInfoBioUnit::GetOperationsIntervalList, + return_value_policy<copy_const_reference>())) .def("SetID", &MMCifInfoBioUnit::SetID) .def("GetID", &MMCifInfoBioUnit::GetID) .add_property("details", &MMCifInfoBioUnit::GetDetails, &MMCifInfoBioUnit::SetDetails) .add_property("chains", make_function(&MMCifInfoBioUnit::GetChainList, return_value_policy<copy_const_reference>())) + .add_property("chainintervalls", make_function( + &MMCifInfoBioUnit::GetChainIntervalList, + return_value_policy<copy_const_reference>())) .add_property("operations", make_function(&MMCifInfoBioUnit::GetOperations, return_value_policy<copy_const_reference>())) + .add_property("operationsintervalls", make_function( + &MMCifInfoBioUnit::GetOperationsIntervalList, + return_value_policy<copy_const_reference>())) .add_property("id", &MMCifInfoBioUnit::GetID, &MMCifInfoBioUnit::SetID) ; diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index 29f0db7463188068b3584a49457126d7ec1601d0..e7f391894cab4e8ace515de79fa14f6e047fd683 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -72,7 +72,75 @@ void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) throw IOException("No citation for identifier '" + id.str() + "' found."); } +void MMCifInfo::AddBioUnit(MMCifInfoBioUnit bu) +{ + std::vector<MMCifInfoBioUnit>::iterator bu_it; + for (bu_it = biounits_.begin(); bu_it != biounits_.end(); ++bu_it) { + if (bu_it->GetID() == bu.GetID()) { + break; + } + } + if (bu_it != biounits_.end()) { + bu_it->Merge(bu); + return; + } + biounits_.push_back(bu); +} +void MMCifInfoBioUnit::AddChain(String chain) +{ + chains_.push_back(chain); + + if (tr_chains_.size()) { + tr_chains_.back().second = chains_.size(); + } + else { + std::pair<int, int> tr_interval = std::pair<int, int>(0, 1); + tr_chains_.push_back(tr_interval); + } +} + +void MMCifInfoBioUnit::SetChainList(std::vector<String> chains) +{ + chains_ = chains; + + if (tr_chains_.size()) { + tr_chains_.clear(); + } + std::pair<int, int> tr_interval = std::pair<int, int>(0, chains_.size()); + tr_chains_.push_back(tr_interval); +} + +void MMCifInfoBioUnit::AddOperations(std::vector<MMCifInfoTransOpPtr> operations) +{ + operations_.push_back(operations); + + if (tr_operations_.size()) { + tr_operations_.back().second = operations_.size(); + } + else { + std::pair<int, int> tr_interval = std::pair<int, int>(0, 1); + tr_operations_.push_back(tr_interval); + } +} + +void MMCifInfoBioUnit::Merge(MMCifInfoBioUnit& from) +{ + // merge chains + int old_size = chains_.size(); + chains_.insert(chains_.end(), from.chains_.begin(), from.chains_.end()); + std::pair<int, int> tr_interval = std::pair<int, int>(old_size, + chains_.size()); + tr_chains_.push_back(tr_interval); + // merge operations + old_size = operations_.size(); + operations_.insert(operations_.end(), + from.operations_.begin(), + from.operations_.end()); + tr_interval.first = old_size; + tr_interval.second = operations_.size(); + tr_operations_.push_back(tr_interval); +} MMCifInfoStructRefSeqPtr MMCifInfoStructRef::AddAlignedSeq(const String& aid, const String& chain_name, @@ -108,5 +176,4 @@ MMCifInfoStructRefSeq::AddDif(int seq_rnum, int db_rnum, const String& details) return d; } - }} //ns diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 8eef288ef571a1ae71297214d958722a84110a1b..52a44aedc1baf2dcea4255f4a4980eeaec03570f 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -241,6 +241,11 @@ public: /// \brief Create a biounit. MMCifInfoBioUnit(): id_(""), details_("") {}; + /// \brief Merge chains & operations, set intervals + /// + /// \param from biounit to read data from + void Merge(MMCifInfoBioUnit& from); + /// \brief Set id /// /// \param id id @@ -262,25 +267,39 @@ public: /// \brief Add a chain name /// /// \param chain chain name - void AddChain(String chain) { chains_.push_back(chain); } + void AddChain(String chain); /// \brief Set a vector of chain names /// - /// \param chain chain name - void SetChainList(std::vector<String> chains) { chains_ = chains; } + /// \param chains chain name + void SetChainList(std::vector<String> chains); /// \brief Get vector of chain names /// /// \return chains const std::vector<String>& GetChainList() const { return chains_; } + /// \brief Get the list of intervals of chains + /// + /// \return pair-intervals + const std::vector<std::pair<int, int> >& GetChainIntervalList() + { + return tr_chains_; + } + /// \brief Add a set of operations /// /// \param operations vector of operations to be added - void AddOperations(std::vector<MMCifInfoTransOpPtr> operations) + void AddOperations(std::vector<MMCifInfoTransOpPtr> operations); + + /// \brief Get the list of intervals of operations + /// + /// \return pair-intervals + const std::vector<std::pair<int, int> >& GetOperationsIntervalList() { - operations_.push_back(operations); + return tr_operations_; } + /// \brief Get the list of operations /// /// \return vector of vectors of iterators. @@ -299,6 +318,12 @@ public: if (this->chains_ != bu.chains_) { return false; } + if (this->tr_chains_ != bu.tr_chains_) { + return false; + } + if (this->tr_operations_ != bu.tr_operations_) { + return false; + } if (this->operations_.size() == bu.operations_.size()) { std::vector<std::vector<MMCifInfoTransOpPtr> >::const_iterator th_ops_it; std::vector<std::vector<MMCifInfoTransOpPtr> >::const_iterator bu_ops_it; @@ -335,8 +360,10 @@ public: private: String id_; ///< pdbx_struct_assembly.id String details_; ///< pdbx_struct_assembly.details - std::vector<String> chains_; ///< chains involved in this assembly + std::vector<String> chains_; ///< all chains of this this assembly + std::vector<std::pair<int, int> > tr_chains_; //< chains of a transformation std::vector<std::vector<MMCifInfoTransOpPtr> > operations_; + std::vector<std::pair<int, int> > tr_operations_; //< ops. of a transformation }; class DLLEXPORT_OST_IO MMCifInfoCitation { @@ -894,10 +921,7 @@ public: /// \brief Add a biounit /// /// \param bu biounit to be added - void AddBioUnit(MMCifInfoBioUnit bu) // unit test - { - biounits_.push_back(bu); - } + void AddBioUnit(MMCifInfoBioUnit bu); /// \brief Get the list of biounits stored in an info object. /// diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py index f32150f69193ebf3301e06d94d990175b51e2841..e51376f25f238d03b1d230306f28fef069a88fd5 100644 --- a/modules/io/tests/test_io_mmcif.py +++ b/modules/io/tests/test_io_mmcif.py @@ -69,21 +69,38 @@ class TestMMCifInfo(unittest.TestCase): b.SetDetails('Details') self.assertEquals(b.GetDetails(), 'Details') b.AddChain('A') + b.AddChain('B') cl = b.GetChainList() + il = b.GetChainIntervalList() self.assertEquals(cl[0], 'A') + self.assertEquals(il[0][0], 0) + self.assertEquals(il[0][1], 2) s = ost.StringList() s.append('B') s.append('C') + s.append('D') b.SetChainList(s) cl = b.GetChainList() + il = b.GetChainIntervalList() + self.assertEquals(il[0][0], 0) + self.assertEquals(il[0][1], 3) self.assertEquals(cl[0], 'B') self.assertEquals(cl[1], 'C') + self.assertEquals(cl[2], 'D') i = io.MMCifInfo() i.AddBioUnit(b) + i.AddBioUnit(b) + b.SetID("2") + i.AddBioUnit(b) bl = i.GetBioUnits() - self.assertEquals(len(bl), 1) + il = bl[0].GetChainIntervalList() + self.assertEquals(il[0][0], 0) + self.assertEquals(il[0][1], 3) + self.assertEquals(il[1][0], 3) + self.assertEquals(il[1][1], 6) + self.assertEquals(len(bl), 2) def test_mmcifinfo_transoperation(self): @@ -109,6 +126,10 @@ class TestMMCifInfo(unittest.TestCase): b.AddOperations(ol) oll = b.GetOperations() self.assertEquals(oll[0][0].GetID(), '1') + tr_ol = b.GetOperationsIntervalList() + self.assertEquals(len(tr_ol), 1) + self.assertEquals(tr_ol[0][0], 0) + self.assertEquals(tr_ol[0][1], 1) def test_mmcifinfo_biounit_pdbize(self): ent, seqres, info = io.LoadMMCIF("testfiles/mmcif/3T6C.cif.gz", @@ -179,7 +200,7 @@ class TestMMCifInfo(unittest.TestCase): pdb_ent, t = info.GetBioUnits()[0].PDBize(ent, transformation=True) self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[0], -915.8, 1) self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[1], -952.345, 2) - self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[2], 3221.75, 2) + self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[2], 3221.74, 2) self.assertEquals(geom.Equal(t, geom.Mat4(1,0,0,-920.462, 0,1,0,-966.654, diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 1719e914075554b3c6b42d7893704236516aca9d..71dc2ddf40d7433c60629497288cf6e989528a5d 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -109,15 +109,23 @@ BOOST_AUTO_TEST_CASE(mmcif_info_biounit) MMCifInfoBioUnit bu = MMCifInfoBioUnit(); bu.SetDetails("author_defined_assembly"); + bu.SetID("1"); + bu.AddChain("B"); bu.AddChain("A"); BOOST_CHECK(bu.GetDetails() == "author_defined_assembly"); BOOST_CHECK(bu.GetChainList().back() == "A"); + std::vector<std::pair<int, int> > tr = bu.GetChainIntervalList(); + BOOST_CHECK(tr[0].first == 0); + BOOST_CHECK(tr[0].second == 2); std::vector<String> chains; chains.push_back("B"); bu.SetChainList(chains); BOOST_CHECK(bu.GetChainList().back() == "B"); + tr = bu.GetChainIntervalList(); + BOOST_CHECK(tr[0].first == 0); + BOOST_CHECK(tr[0].second == 1); MMCifInfo info = MMCifInfo(); info.AddBioUnit(bu); @@ -125,6 +133,18 @@ BOOST_AUTO_TEST_CASE(mmcif_info_biounit) BOOST_CHECK(biounits.size() == 1); BOOST_CHECK(biounits.back() == bu); + info.AddBioUnit(bu); + bu.SetID("2"); + info.AddBioUnit(bu); + biounits = info.GetBioUnits(); + BOOST_CHECK(biounits.size() == 2); + tr = biounits.front().GetChainIntervalList(); + BOOST_CHECK(tr.size() == 2); + BOOST_CHECK(tr[0].first == 0); + BOOST_CHECK(tr[0].second == 1); + BOOST_CHECK(tr[1].first == 1); + BOOST_CHECK(tr[1].second == 2); + BOOST_MESSAGE(" done."); } @@ -153,6 +173,10 @@ BOOST_AUTO_TEST_CASE(mmcif_info_transoperation) MMCifInfoBioUnit bu = MMCifInfoBioUnit(); bu.AddOperations(ops); BOOST_CHECK((*(bu.GetOperations().begin()->begin())) == op); + std::vector<std::pair<int, int> > tr = bu.GetOperationsIntervalList(); + BOOST_CHECK(tr.size() == 1); + BOOST_CHECK(tr.back().first == 0); + BOOST_CHECK(tr.back().second == 1); BOOST_MESSAGE(" done."); }