diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index a887323546f8b9687ec9c6e0bde909903261d2d7..df9a948bd44cb5b17c9e3174d97c6cc64fb83e01 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -130,7 +130,14 @@ of the annotation available. .. method:: AddBioUnit(biounit) - Add a bio unit to the bio unit list of an info object. + Add a bio unit to the bio unit list of an info object. If the + :attr:`id <MMCifInfoBioUnit.id>` of ``biounit`` already exists in the set + of assemblies, both will be merged. This means that + :attr:`chain <MMCifInfoBioUnit.chains>` and + :attr:`operations <MMCifInfoBioUnit.operations>` lists will be concatenated + and the interval lists + (:attr:`operationsintervalls <MMCifInfoBioUnit.operationsintervalls>`, + :attr:`chainintervalls <MMCifInfoBioUnit.chainintervalls>`) will be updated. :param biounit: Bio unit to be added. :type biounit: :class:`MMCifInfoBioUnit` @@ -499,7 +506,17 @@ of the annotation available. Chains involved in this bio unit. If not provided, resembles an empty list. Also available as :meth:`GetChainList`. May also be modified by - :meth:`AddChain`. + :meth:`AddChain` or :meth:`SetChainList`. + + .. attribute:: chainintervalls + + List of intervals on the chain list. Needed if there a several sets of + chains and transformations to create the bio unit. Comes as a list of + tuples. First component is the start, second is the right border of the + interval. + + Also available as :meth:`GetChainIntervalList`. Is automatically modified by + :meth:`AddChain`, :meth:`SetChainList` and :meth:`MMCifInfo.AddBioUnit`. .. attribute:: operations @@ -509,6 +526,16 @@ of the annotation available. Also available as :meth:`GetOperations`. May be modified by :meth:`AddOperations` + .. attribute:: operationsintervalls + + List of intervals on the operations list. Needed if there a several sets of + chains and transformations to create the bio unit. Comes as a list of + tuples. First component is the start, second is the right border of the + interval. + + Also available as :meth:`GetOperationsIntervalList`. Is automatically + modified by :meth:`AddOperations` and :meth:`MMCifInfo.AddBioUnit`. + .. method:: GetID() See :attr:`id` @@ -529,9 +556,22 @@ of the annotation available. See :attr:`chains` + .. method:: SetChainList(chains) + + See :attr:`chains`, also resets :attr:`chainintervalls` to contain only one + interval enclosing the whole chain list. + + :param chains: List of chain names. + :type chains: :class:`~ost.StringList` + .. method:: AddChain(chain name) - See :attr:`chains` + See :attr:`chains`, also extends the right border of the last entry in + :attr:`chainintervalls`. + + .. method:: GetChainIntervalList() + + See :attr:`chainintervalls` .. method:: GetOperations() @@ -539,7 +579,12 @@ of the annotation available. .. method:: AddOperations(list of operations) - See :attr:`operations` + See :attr:`operations`, also extends the right border of the last entry in + :attr:`operationsintervalls`. + + .. method:: GetOperationsIntervalList() + + See :attr:`operationsintervalls` .. function:: PDBize(asu, seqres=None, min_polymer_size=10, transformation=False) @@ -567,7 +612,7 @@ of the annotation available. :param asu: Asymmetric unit to work on. Should be created from a mmCIF file. - :type asu: :class:`~ost.mol.EntityHandle>` + :type asu: :class:`~ost.mol.EntityHandle` :param seqres: If set to a valid sequence list, the length of the seqres records will be used to determine if a certain chain has the minimally required length. @@ -948,3 +993,8 @@ of the annotation available. .. LocalWords: auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr .. LocalWords: GetRevisions AddRevision SetRevisionsDateOriginal GetSize .. LocalWords: GetNum num GetStatus GetLastDate GetFirstRelease storable +.. LocalWords: SetChainList MMCifInfoTransOp ChainTypes MMCifInfoStructRef +.. LocalWords: MMCifInfoRevisions bool difs MMCifInfoStructRefSeqDif rnum +.. LocalWords: SetDateOriginal GetDateOriginal yyyy operationsintervalls +.. LocalWords: chainintervalls GetChainIntervalList +.. LocalWords: GetOperationsIntervalList diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index e9e597d726f5d921fb6938f12a13a221610b16b8..2bfe867309289f3daa5bb504ac1c18235c17b009 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -380,105 +380,110 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, return atom_pos_wrong chain_names='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz' + cur_chain_name = 0 + water_chain = mol.ChainHandle() + ligand_chain = mol.ChainHandle() + a_pos_wrong = False + pdb_bu = mol.CreateEntity() + edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT) + chains = biounit.GetChainList() + c_intvls = biounit.GetChainIntervalList() + o_intvls = biounit.GetOperationsIntervalList() # create list of operations # for cartesian products, operations are stored in a list, multiplied with # the next list of operations and re-stored... until all lists of operations # are multiplied in an all-against-all manner. operations = biounit.GetOperations() - trans_matrices = list() - if len(operations) > 0: - for op in operations[0]: - rot = geom.Mat4() - rot.PasteRotation(op.rotation) - trans = geom.Mat4() - trans.PasteTranslation(op.translation) - tr = geom.Mat4() - tr = trans * rot - trans_matrices.append(tr) - for op_n in range(1, len(operations)): - tmp_ops = list() - for o in operations[op_n]: + for i in range(0,len(c_intvls)): + trans_matrices = list() + l_operations = operations[o_intvls[i][0]:o_intvls[i][1]] + if len(l_operations) > 0: + for op in l_operations[0]: rot = geom.Mat4() - rot.PasteRotation(o.rotation) + rot.PasteRotation(op.rotation) trans = geom.Mat4() - trans.PasteTranslation(o.translation) + trans.PasteTranslation(op.translation) tr = geom.Mat4() tr = trans * rot - for t_o in trans_matrices: - tp = t_o * tr - tmp_ops.append(tp) - trans_matrices = tmp_ops - # select chains into a view as basis for each transformation - assu = asu.Select('cname=' + ','.join(biounit.GetChainList())) - # use each transformation on the view, store as entity and transform, PDBize - # the result while adding everything to one large entity - pdb_bu = mol.CreateEntity() - edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT) - cur_chain_name = 0 - water_chain = mol.ChainHandle() - ligand_chain = mol.ChainHandle() - a_pos_wrong = False - for tr in trans_matrices: - # do a PDBize, add each new entity to the end product - for chain in assu.chains: - residue_count = len(chain.residues) - if seqres: - seqres_chain = seqres.FindSequence(chain.name) - if seqres_chain.IsValid(): - residue_count = len(seqres_chain) - if chain.is_polymer and residue_count >= min_polymer_size: - if len(chain_names) == cur_chain_name: - raise RuntimeError('Running out of chain names') - new_chain = edi.InsertChain(chain_names[cur_chain_name]) - cur_chain_name += 1 - edi.SetChainDescription(new_chain, chain.description) - edi.SetChainType(new_chain, chain.type) - new_chain.SetStringProp('original_name', chain.name) - if chain.HasProp("pdb_auth_chain_name"): - new_chain.SetStringProp("pdb_auth_chain_name", - chain.GetStringProp("pdb_auth_chain_name")) - for res in chain.residues: - new_res = edi.AppendResidue(new_chain, res.name, res.number) - a_b = _CopyAtoms(res, new_res, edi, tr) - if not a_pos_wrong: - a_pos_wrong = a_b - elif chain.type == mol.CHAINTYPE_WATER: - if not water_chain.IsValid(): - # water gets '-' as name - water_chain = edi.InsertChain('-') - edi.SetChainDescription(water_chain, chain.description) - edi.SetChainType(water_chain, chain.type) - for res in chain.residues: - new_res = edi.AppendResidue(water_chain, res.name) - new_res.SetStringProp('type', mol.StringFromChainType(chain.type)) - new_res.SetStringProp('description', chain.description) - a_b = _CopyAtoms(res, new_res, edi, tr) - if not a_pos_wrong: - a_pos_wrong = a_b - else: - if not ligand_chain.IsValid(): - # all ligands, put in one chain, are named '_' - ligand_chain = edi.InsertChain('_') - last_rnum = 0 - else: - last_rnum = ligand_chain.residues[-1].number.num - residues=chain.residues - ins_code='\0' - if len(residues)>1: - ins_code='A' - for res in chain.residues: - new_res = edi.AppendResidue(ligand_chain, res.name, - mol.ResNum(last_rnum+1, ins_code)) - new_res.SetStringProp('description', chain.description) - new_res.SetStringProp('type', mol.StringFromChainType(chain.type)) - new_res.SetStringProp("original_name", chain.name) + trans_matrices.append(tr) + for op_n in range(1, len(l_operations)): + tmp_ops = list() + for o in l_operations[op_n]: + rot = geom.Mat4() + rot.PasteRotation(o.rotation) + trans = geom.Mat4() + trans.PasteTranslation(o.translation) + tr = geom.Mat4() + tr = trans * rot + for t_o in trans_matrices: + tp = t_o * tr + tmp_ops.append(tp) + trans_matrices = tmp_ops + # select chains into a view as basis for each transformation + assu = asu.Select('cname='+','.join(chains[c_intvls[i][0]:c_intvls[i][1]])) + # use each transformation on the view, store as entity and transform, PDBize + # the result while adding everything to one large entity + for tr in trans_matrices: + # do a PDBize, add each new entity to the end product + for chain in assu.chains: + residue_count = len(chain.residues) + if seqres: + seqres_chain = seqres.FindSequence(chain.name) + if seqres_chain.IsValid(): + residue_count = len(seqres_chain) + if chain.is_polymer and residue_count >= min_polymer_size: + if len(chain_names) == cur_chain_name: + raise RuntimeError('Running out of chain names') + new_chain = edi.InsertChain(chain_names[cur_chain_name]) + cur_chain_name += 1 + edi.SetChainDescription(new_chain, chain.description) + edi.SetChainType(new_chain, chain.type) + new_chain.SetStringProp('original_name', chain.name) if chain.HasProp("pdb_auth_chain_name"): - new_res.SetStringProp("pdb_auth_chain_name", - chain.GetStringProp("pdb_auth_chain_name")) - ins_code = chr(ord(ins_code)+1) - a_b = _CopyAtoms(res, new_res, edi, tr) - if not a_pos_wrong: - a_pos_wrong = a_b + new_chain.SetStringProp("pdb_auth_chain_name", + chain.GetStringProp("pdb_auth_chain_name")) + for res in chain.residues: + new_res = edi.AppendResidue(new_chain, res.name, res.number) + a_b = _CopyAtoms(res, new_res, edi, tr) + if not a_pos_wrong: + a_pos_wrong = a_b + elif chain.type == mol.CHAINTYPE_WATER: + if not water_chain.IsValid(): + # water gets '-' as name + water_chain = edi.InsertChain('-') + edi.SetChainDescription(water_chain, chain.description) + edi.SetChainType(water_chain, chain.type) + for res in chain.residues: + new_res = edi.AppendResidue(water_chain, res.name) + new_res.SetStringProp('type', mol.StringFromChainType(chain.type)) + new_res.SetStringProp('description', chain.description) + a_b = _CopyAtoms(res, new_res, edi, tr) + if not a_pos_wrong: + a_pos_wrong = a_b + else: + if not ligand_chain.IsValid(): + # all ligands, put in one chain, are named '_' + ligand_chain = edi.InsertChain('_') + last_rnum = 0 + else: + last_rnum = ligand_chain.residues[-1].number.num + residues=chain.residues + ins_code='\0' + if len(residues)>1: + ins_code='A' + for res in chain.residues: + new_res = edi.AppendResidue(ligand_chain, res.name, + mol.ResNum(last_rnum+1, ins_code)) + new_res.SetStringProp('description', chain.description) + new_res.SetStringProp('type', mol.StringFromChainType(chain.type)) + new_res.SetStringProp("original_name", chain.name) + if chain.HasProp("pdb_auth_chain_name"): + new_res.SetStringProp("pdb_auth_chain_name", + chain.GetStringProp("pdb_auth_chain_name")) + ins_code = chr(ord(ins_code)+1) + a_b = _CopyAtoms(res, new_res, edi, tr) + if not a_pos_wrong: + a_pos_wrong = a_b move_to_origin = None if a_pos_wrong: start = pdb_bu.bounds.min diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 78cd7e124f08460009e4f5b98432080e8422ae95..9a21bdb1af003bdaa33406e7f7e98754f8a606ea 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -21,6 +21,7 @@ #include <boost/python/suite/indexing/vector_indexing_suite.hpp> using namespace boost::python; +#include <ost/export_helper/pair_to_tuple_conv.hh> #include <ost/io/mol/io_profile.hh> #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/mol/mmcif_info.hh> @@ -172,23 +173,44 @@ void export_mmcif_io() .add_property("seq_rnum", &MMCifInfoStructRefSeqDif::GetSeqRNum) .add_property("db_rnum", &MMCifInfoStructRefSeqDif::GetDBRNum) ; + + typedef std::pair<int, int> IntPair; + to_python_converter<IntPair, PairToTupleConverter<int, int> >(); + typedef std::vector<IntPair> VectorIntPair; + class_<VectorIntPair>("VectorIntPair", init<>()) + .def(vector_indexing_suite<VectorIntPair, true>()) + ; + class_<MMCifInfoBioUnit>("MMCifInfoBioUnit", init<>()) .def("SetDetails", &MMCifInfoBioUnit::SetDetails) .def("GetDetails", &MMCifInfoBioUnit::GetDetails) .def("AddChain", &MMCifInfoBioUnit::AddChain) + .def("SetChainList", &MMCifInfoBioUnit::SetChainList) .def("GetChainList", make_function(&MMCifInfoBioUnit::GetChainList, return_value_policy<copy_const_reference>())) + .def("GetChainIntervalList", + make_function(&MMCifInfoBioUnit::GetChainIntervalList, + return_value_policy<copy_const_reference>())) .def("AddOperations", &MMCifInfoBioUnit::AddOperations) .def("GetOperations", make_function(&MMCifInfoBioUnit::GetOperations, return_value_policy<copy_const_reference>())) + .def("GetOperationsIntervalList", + make_function(&MMCifInfoBioUnit::GetOperationsIntervalList, + return_value_policy<copy_const_reference>())) .def("SetID", &MMCifInfoBioUnit::SetID) .def("GetID", &MMCifInfoBioUnit::GetID) .add_property("details", &MMCifInfoBioUnit::GetDetails, &MMCifInfoBioUnit::SetDetails) .add_property("chains", make_function(&MMCifInfoBioUnit::GetChainList, return_value_policy<copy_const_reference>())) + .add_property("chainintervalls", make_function( + &MMCifInfoBioUnit::GetChainIntervalList, + return_value_policy<copy_const_reference>())) .add_property("operations", make_function(&MMCifInfoBioUnit::GetOperations, return_value_policy<copy_const_reference>())) + .add_property("operationsintervalls", make_function( + &MMCifInfoBioUnit::GetOperationsIntervalList, + return_value_policy<copy_const_reference>())) .add_property("id", &MMCifInfoBioUnit::GetID, &MMCifInfoBioUnit::SetID) ; diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index 29f0db7463188068b3584a49457126d7ec1601d0..e7f391894cab4e8ace515de79fa14f6e047fd683 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -72,7 +72,75 @@ void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) throw IOException("No citation for identifier '" + id.str() + "' found."); } +void MMCifInfo::AddBioUnit(MMCifInfoBioUnit bu) +{ + std::vector<MMCifInfoBioUnit>::iterator bu_it; + for (bu_it = biounits_.begin(); bu_it != biounits_.end(); ++bu_it) { + if (bu_it->GetID() == bu.GetID()) { + break; + } + } + if (bu_it != biounits_.end()) { + bu_it->Merge(bu); + return; + } + biounits_.push_back(bu); +} +void MMCifInfoBioUnit::AddChain(String chain) +{ + chains_.push_back(chain); + + if (tr_chains_.size()) { + tr_chains_.back().second = chains_.size(); + } + else { + std::pair<int, int> tr_interval = std::pair<int, int>(0, 1); + tr_chains_.push_back(tr_interval); + } +} + +void MMCifInfoBioUnit::SetChainList(std::vector<String> chains) +{ + chains_ = chains; + + if (tr_chains_.size()) { + tr_chains_.clear(); + } + std::pair<int, int> tr_interval = std::pair<int, int>(0, chains_.size()); + tr_chains_.push_back(tr_interval); +} + +void MMCifInfoBioUnit::AddOperations(std::vector<MMCifInfoTransOpPtr> operations) +{ + operations_.push_back(operations); + + if (tr_operations_.size()) { + tr_operations_.back().second = operations_.size(); + } + else { + std::pair<int, int> tr_interval = std::pair<int, int>(0, 1); + tr_operations_.push_back(tr_interval); + } +} + +void MMCifInfoBioUnit::Merge(MMCifInfoBioUnit& from) +{ + // merge chains + int old_size = chains_.size(); + chains_.insert(chains_.end(), from.chains_.begin(), from.chains_.end()); + std::pair<int, int> tr_interval = std::pair<int, int>(old_size, + chains_.size()); + tr_chains_.push_back(tr_interval); + // merge operations + old_size = operations_.size(); + operations_.insert(operations_.end(), + from.operations_.begin(), + from.operations_.end()); + tr_interval.first = old_size; + tr_interval.second = operations_.size(); + tr_operations_.push_back(tr_interval); +} MMCifInfoStructRefSeqPtr MMCifInfoStructRef::AddAlignedSeq(const String& aid, const String& chain_name, @@ -108,5 +176,4 @@ MMCifInfoStructRefSeq::AddDif(int seq_rnum, int db_rnum, const String& details) return d; } - }} //ns diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index f567af0520381f6b726be63046235e2dd0b9a87a..52a44aedc1baf2dcea4255f4a4980eeaec03570f 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -241,6 +241,11 @@ public: /// \brief Create a biounit. MMCifInfoBioUnit(): id_(""), details_("") {}; + /// \brief Merge chains & operations, set intervals + /// + /// \param from biounit to read data from + void Merge(MMCifInfoBioUnit& from); + /// \brief Set id /// /// \param id id @@ -262,19 +267,39 @@ public: /// \brief Add a chain name /// /// \param chain chain name - void AddChain(String chain) { chains_.push_back(chain); } + void AddChain(String chain); + + /// \brief Set a vector of chain names + /// + /// \param chains chain name + void SetChainList(std::vector<String> chains); + /// \brief Get vector of chain names /// /// \return chains const std::vector<String>& GetChainList() const { return chains_; } + /// \brief Get the list of intervals of chains + /// + /// \return pair-intervals + const std::vector<std::pair<int, int> >& GetChainIntervalList() + { + return tr_chains_; + } + /// \brief Add a set of operations /// /// \param operations vector of operations to be added - void AddOperations(std::vector<MMCifInfoTransOpPtr> operations) + void AddOperations(std::vector<MMCifInfoTransOpPtr> operations); + + /// \brief Get the list of intervals of operations + /// + /// \return pair-intervals + const std::vector<std::pair<int, int> >& GetOperationsIntervalList() { - operations_.push_back(operations); + return tr_operations_; } + /// \brief Get the list of operations /// /// \return vector of vectors of iterators. @@ -293,6 +318,12 @@ public: if (this->chains_ != bu.chains_) { return false; } + if (this->tr_chains_ != bu.tr_chains_) { + return false; + } + if (this->tr_operations_ != bu.tr_operations_) { + return false; + } if (this->operations_.size() == bu.operations_.size()) { std::vector<std::vector<MMCifInfoTransOpPtr> >::const_iterator th_ops_it; std::vector<std::vector<MMCifInfoTransOpPtr> >::const_iterator bu_ops_it; @@ -329,8 +360,10 @@ public: private: String id_; ///< pdbx_struct_assembly.id String details_; ///< pdbx_struct_assembly.details - std::vector<String> chains_; ///< chains involved in this assembly + std::vector<String> chains_; ///< all chains of this this assembly + std::vector<std::pair<int, int> > tr_chains_; //< chains of a transformation std::vector<std::vector<MMCifInfoTransOpPtr> > operations_; + std::vector<std::pair<int, int> > tr_operations_; //< ops. of a transformation }; class DLLEXPORT_OST_IO MMCifInfoCitation { @@ -888,10 +921,7 @@ public: /// \brief Add a biounit /// /// \param bu biounit to be added - void AddBioUnit(MMCifInfoBioUnit bu) // unit test - { - biounits_.push_back(bu); - } + void AddBioUnit(MMCifInfoBioUnit bu); /// \brief Get the list of biounits stored in an info object. /// diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 33e71d4e3daeced0ddf26484cdfb4c0dfe9bbccf..91d46cb9b86646add1832a8ea972b6c316a3cba7 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -981,14 +981,13 @@ std::vector<std::vector<String> > MMCifReader::UnPackOperExperession(StringRef e void MMCifReader::ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns) { MMCifBioUAssembly assembly; - assembly.biounit = MMCifInfoBioUnit(); - assembly.biounit.SetID(columns[indices_[ASSEMBLY_ID]].str()); + assembly.biounit_id = columns[indices_[ASSEMBLY_ID]].str(); std::vector<StringRef> tmp_chains=columns[indices_[ASYM_ID_LIST]].split(','); std::vector<StringRef>::const_iterator tc_it; for (tc_it = tmp_chains.begin(); tc_it != tmp_chains.end(); ++tc_it) { - assembly.biounit.AddChain(tc_it->str()); + assembly.chains.push_back(tc_it->str()); } assembly.operations = @@ -1620,19 +1619,22 @@ void MMCifReader::OnEndData() std::vector<MMCifInfoTransOpPtr> operations = info_.GetOperations(); info_.SetStructRefs(struct_refs_); std::vector<MMCifInfoTransOpPtr>::const_iterator buop_it; + MMCifInfoBioUnit biounit; for (bua_it = bu_assemblies_.begin(); bua_it != bu_assemblies_.end(); ++bua_it) { + biounit = MMCifInfoBioUnit(); // pair with pdbx_struct_assembly entry - buom_it = bu_origin_map_.find(bua_it->biounit.GetID()); + buom_it = bu_origin_map_.find(bua_it->biounit_id); if (buom_it == bu_origin_map_.end()) { throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "No pdbx_struct_assembly.id '"+ - bua_it->biounit.GetID() + + bua_it->biounit_id + "' found as requested by pdbx_struct_assembly_gen.")); } - bua_it->biounit.SetDetails(buom_it->second); - bua_it->biounit.SetID(buom_it->first); + biounit.SetDetails(buom_it->second); + biounit.SetID(buom_it->first); + biounit.SetChainList(bua_it->chains); // pair with pdbx_struct_oper_list for (aol_it = bua_it->operations.begin(); @@ -1657,9 +1659,9 @@ void MMCifReader::OnEndData() "' found as requested by pdbx_struct_assembly_gen.")); } } - bua_it->biounit.AddOperations(operation_list); + biounit.AddOperations(operation_list); } - info_.AddBioUnit(bua_it->biounit); + info_.AddBioUnit(biounit); } bu_assemblies_.clear(); diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index b8f6daed068fd5c96293d4d8e6442d24a74b3e0b..d91cf340355be48f58664a4ada5e02966365e931 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -559,9 +559,11 @@ private: /// \struct assembly information typedef struct { - MMCifInfoBioUnit biounit; + String biounit_id; ///< identifier for the bu + std::vector<String> chains; ///< chains affected by + /// this operations std::vector<std::vector<String> > operations; ///< list of links to - /// MMCifBioUOperation + /// MMCifBioUOperation } MMCifBioUAssembly; typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector; diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py index 9b5481d6d821ccc54c133961d4128fa2654ed915..be7594314f103c127f2c6297066daf45f24e7d21 100644 --- a/modules/io/tests/test_io_mmcif.py +++ b/modules/io/tests/test_io_mmcif.py @@ -69,14 +69,38 @@ class TestMMCifInfo(unittest.TestCase): b.SetDetails('Details') self.assertEquals(b.GetDetails(), 'Details') b.AddChain('A') + b.AddChain('B') cl = b.GetChainList() + il = b.GetChainIntervalList() self.assertEquals(cl[0], 'A') + self.assertEquals(il[0][0], 0) + self.assertEquals(il[0][1], 2) + s = ost.StringList() + s.append('B') + s.append('C') + s.append('D') + b.SetChainList(s) + cl = b.GetChainList() + il = b.GetChainIntervalList() + self.assertEquals(il[0][0], 0) + self.assertEquals(il[0][1], 3) + self.assertEquals(cl[0], 'B') + self.assertEquals(cl[1], 'C') + self.assertEquals(cl[2], 'D') i = io.MMCifInfo() i.AddBioUnit(b) + i.AddBioUnit(b) + b.SetID("2") + i.AddBioUnit(b) bl = i.GetBioUnits() - self.assertEquals(len(bl), 1) + il = bl[0].GetChainIntervalList() + self.assertEquals(il[0][0], 0) + self.assertEquals(il[0][1], 3) + self.assertEquals(il[1][0], 3) + self.assertEquals(il[1][1], 6) + self.assertEquals(len(bl), 2) def test_mmcifinfo_transoperation(self): @@ -102,6 +126,10 @@ class TestMMCifInfo(unittest.TestCase): b.AddOperations(ol) oll = b.GetOperations() self.assertEquals(oll[0][0].GetID(), '1') + tr_ol = b.GetOperationsIntervalList() + self.assertEquals(len(tr_ol), 1) + self.assertEquals(tr_ol[0][0], 0) + self.assertEquals(tr_ol[0][1], 1) def test_mmcifinfo_biounit_pdbize(self): ent, seqres, info = io.LoadMMCIF("testfiles/mmcif/3T6C.cif.gz", diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 3f3e21baf4b8a9e3f10b8f65380338668015bfd3..71dc2ddf40d7433c60629497288cf6e989528a5d 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -109,10 +109,23 @@ BOOST_AUTO_TEST_CASE(mmcif_info_biounit) MMCifInfoBioUnit bu = MMCifInfoBioUnit(); bu.SetDetails("author_defined_assembly"); + bu.SetID("1"); + bu.AddChain("B"); bu.AddChain("A"); BOOST_CHECK(bu.GetDetails() == "author_defined_assembly"); BOOST_CHECK(bu.GetChainList().back() == "A"); + std::vector<std::pair<int, int> > tr = bu.GetChainIntervalList(); + BOOST_CHECK(tr[0].first == 0); + BOOST_CHECK(tr[0].second == 2); + + std::vector<String> chains; + chains.push_back("B"); + bu.SetChainList(chains); + BOOST_CHECK(bu.GetChainList().back() == "B"); + tr = bu.GetChainIntervalList(); + BOOST_CHECK(tr[0].first == 0); + BOOST_CHECK(tr[0].second == 1); MMCifInfo info = MMCifInfo(); info.AddBioUnit(bu); @@ -120,6 +133,18 @@ BOOST_AUTO_TEST_CASE(mmcif_info_biounit) BOOST_CHECK(biounits.size() == 1); BOOST_CHECK(biounits.back() == bu); + info.AddBioUnit(bu); + bu.SetID("2"); + info.AddBioUnit(bu); + biounits = info.GetBioUnits(); + BOOST_CHECK(biounits.size() == 2); + tr = biounits.front().GetChainIntervalList(); + BOOST_CHECK(tr.size() == 2); + BOOST_CHECK(tr[0].first == 0); + BOOST_CHECK(tr[0].second == 1); + BOOST_CHECK(tr[1].first == 1); + BOOST_CHECK(tr[1].second == 2); + BOOST_MESSAGE(" done."); } @@ -148,6 +173,10 @@ BOOST_AUTO_TEST_CASE(mmcif_info_transoperation) MMCifInfoBioUnit bu = MMCifInfoBioUnit(); bu.AddOperations(ops); BOOST_CHECK((*(bu.GetOperations().begin()->begin())) == op); + std::vector<std::pair<int, int> > tr = bu.GetOperationsIntervalList(); + BOOST_CHECK(tr.size() == 1); + BOOST_CHECK(tr.back().first == 0); + BOOST_CHECK(tr.back().second == 1); BOOST_MESSAGE(" done."); }