diff --git a/modules/io/pymod/export_omf_io.cc b/modules/io/pymod/export_omf_io.cc index 974211d4542b97c6ecd92942fd37fa911873eafa..bb3e8169c1e61cff430c82fb1c42562702119568 100644 --- a/modules/io/pymod/export_omf_io.cc +++ b/modules/io/pymod/export_omf_io.cc @@ -66,14 +66,14 @@ void export_omf_io() { class_<OMF, OMFPtr>("OMF",no_init) .def("FromEntity", &OMF::FromEntity, (arg("ent"), arg("options")=0)).staticmethod("FromEntity") - .def("FromMMCIF", &OMF::FromMMCIF, (arg("ent"), arg("mmcif_info"), arg("options")=0)).staticmethod("FromMMCIF") .def("FromFile", &OMF::FromFile).staticmethod("FromFile") .def("FromBytes", &wrap_from_bytes).staticmethod("FromBytes") .def("ToFile", &OMF::ToFile) .def("ToBytes", &wrap_to_bytes) .def("GetAU", &OMF::GetAU) + .def("GetEntity", &OMF::GetEntity) .def("GetAUChain", &OMF::GetAUChain) - .def("GetBU", &OMF::GetBU) + .def("GetEntityChain", &OMF::GetEntityChain) .def("GetName", &OMF::GetName) .def("GetChainNames", &wrap_get_chain_names) .def("GetPositions", &OMF::GetPositions, return_value_policy<reference_existing_object>(),(arg("cname"))) diff --git a/modules/io/src/mol/omf.cc b/modules/io/src/mol/omf.cc index 35ee9ce2d57671de321c96569b6264f6bb79fc5c..310857d4d82ad2904e530f7fbd1a12e20a4fd92b 100644 --- a/modules/io/src/mol/omf.cc +++ b/modules/io/src/mol/omf.cc @@ -877,52 +877,6 @@ namespace{ } } - // generates as many chain names as you want (potentially multiple characters) - struct ChainNameGenerator{ - ChainNameGenerator() { - chain_names = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"; - n_chain_names = chain_names.size(); - indices.push_back(-1); - } - - String Get() { - int idx = indices.size() - 1; - indices[idx] += 1; - bool more_digits = false; - while(idx >= 0) { - if(indices[idx] >= n_chain_names) { - indices[idx] = 0; - if(idx>0) { - indices[idx-1] += 1; - --idx; - } else { - more_digits = true; - break; - } - } else { - break; - } - } - if(more_digits) { - indices.insert(indices.begin(), 0); - } - String ch_name(indices.size(), 'X'); - for(uint i = 0; i < indices.size(); ++i) { - ch_name[i] = chain_names[indices[i]]; - } - return ch_name; - } - - void Reset() { - indices.clear(); - indices.push_back(-1); - } - - String chain_names; - int n_chain_names; - std::vector<int> indices; - }; - // delta/runlength encodings/decodings void DeltaEncoding(const std::vector<int>& in, std::vector<int>& out) { out.clear(); @@ -1299,17 +1253,6 @@ namespace{ } } - // dump and load vectors with BioUnitDefinition - void Load(std::istream& stream, - std::vector<ost::io::BioUnitDefinition>& vec) { - uint32_t size; - stream.read(reinterpret_cast<char*>(&size), sizeof(uint32_t)); - vec.resize(size); - for(uint i = 0; i < size; ++i) { - vec[i].FromStream(stream); - } - } - void Dump(std::ostream& stream, const std::vector<bool>& vec) { uint32_t size = vec.size(); @@ -1337,34 +1280,6 @@ namespace{ } } - void Dump(std::ostream& stream, - const std::vector<ost::io::BioUnitDefinition>& vec) { - uint32_t size = vec.size(); - stream.write(reinterpret_cast<char*>(&size), sizeof(uint32_t)); - for(uint i = 0; i < size; ++i) { - vec[i].ToStream(stream); - } - } - - // dump and load vectors with Mat4 - void Load(std::istream& stream, std::vector<geom::Mat4>& vec) { - uint32_t size; - stream.read(reinterpret_cast<char*>(&size), sizeof(uint32_t)); - vec.resize(size); - for(uint i = 0; i < size; ++i) { - stream.read(reinterpret_cast<char*>(vec[i].Data()),16*sizeof(Real)); - } - } - - void Dump(std::ostream& stream, const std::vector<geom::Mat4>& vec) { - uint32_t size = vec.size(); - stream.write(reinterpret_cast<char*>(&size), sizeof(uint32_t)); - for(uint i = 0; i < size; ++i) { - stream.write(reinterpret_cast<const char*>(vec[i].Data()), - 16*sizeof(Real)); - } - } - // knowledge based dump and load functions, i.e. awesome compressions void LoadIsHetatm(std::istream& stream, std::vector<bool>& vec) { std::vector<int> run_length_encoded; @@ -1845,61 +1760,6 @@ void ResidueDefinition::_AddAtomRule(int a_idx, int anch_one_idx, sidechain_atom_rules.push_back(rule); } -BioUnitDefinition::BioUnitDefinition(const ost::io::MMCifInfoBioUnit& bu) { - - au_chains = bu.GetChainList(); - - const std::vector<std::pair<int, int> >& bu_ch_intvl = - bu.GetChainIntervalList(); - for(auto it = bu_ch_intvl.begin(); it != bu_ch_intvl.end(); ++it) { - chain_intvl.push_back(it->first); - chain_intvl.push_back(it->second); - } - - const std::vector<std::vector<MMCifInfoTransOpPtr> >& bu_op_list = - bu.GetOperations(); - for(auto i = bu_op_list.begin(); i != bu_op_list.end(); ++i) { - std::vector<geom::Mat4> mat_list; - for(auto j = i->begin(); j != i->end(); ++j) { - geom::Mat4 m; - m.PasteRotation((*j)->GetMatrix()); - m.PasteTranslation((*j)->GetVector()); - mat_list.push_back(m); - } - operations.push_back(mat_list); - } - - const std::vector<std::pair<int, int> >& bu_op_intvl = - bu.GetOperationsIntervalList(); - for(auto it = bu_op_intvl.begin(); it != bu_op_intvl.end(); ++it) { - op_intvl.push_back(it->first); - op_intvl.push_back(it->second); - } -} - -void BioUnitDefinition::ToStream(std::ostream& stream) const { - Dump(stream, au_chains); - Dump(stream, chain_intvl); - uint32_t size = operations.size(); - stream.write(reinterpret_cast<char*>(&size), sizeof(uint32_t)); - for(auto it = operations.begin(); it != operations.end(); ++it) { - Dump(stream, *it); - } - Dump(stream, op_intvl); -} - -void BioUnitDefinition::FromStream(std::istream& stream) { - Load(stream, au_chains); - Load(stream, chain_intvl); - uint32_t size = 0; - stream.read(reinterpret_cast<char*>(&size), sizeof(uint32_t)); - operations.resize(size); - for(uint i = 0; i < size; ++i) { - Load(stream, operations[i]); - } - Load(stream, op_intvl); -} - ChainData::ChainData(const ost::mol::ChainHandle& chain, const std::vector<ResidueDefinition>& residue_definitions, const std::unordered_map<unsigned long, int>& res_idx_map, @@ -4934,18 +4794,6 @@ OMFPtr OMF::FromEntity(const ost::mol::EntityHandle& ent, return omf; } -OMFPtr OMF::FromMMCIF(const ost::mol::EntityHandle& ent, - const MMCifInfo& info, - uint8_t options) { - - OMFPtr p = OMF::FromEntity(ent, options); - const std::vector<MMCifInfoBioUnit>& biounits = info.GetBioUnits(); - for(auto it = biounits.begin(); it != biounits.end(); ++it) { - p->biounit_definitions_.push_back(BioUnitDefinition(*it)); - } - return p; -} - OMFPtr OMF::FromFile(const String& fn) { std::ifstream in_stream(fn.c_str(), std::ios::binary); if (!in_stream) { @@ -5014,67 +4862,6 @@ ost::mol::EntityHandle OMF::GetAUChain(const String& name) const{ return ent; } -ost::mol::EntityHandle OMF::GetBU(int bu_idx) const{ - if(bu_idx < 0 || bu_idx >= static_cast<int>(biounit_definitions_.size())) { - throw ost::Error("Invalid biounit idx"); - } - - const BioUnitDefinition& bu = biounit_definitions_[bu_idx]; - ost::mol::EntityHandle ent = mol::CreateEntity(); - std::stringstream ss; - ss << name_ << " " << bu_idx; - ent.SetName(ss.str()); - ost::mol::XCSEditor ed = ent.EditXCS(mol::BUFFERED_EDIT); - - std::vector<String> au_chain_names; - std::vector<geom::Mat4> transforms; - - // The code below is pure magic and heavily inspired by - // the biounit buildup in modules/io/pymod/__init__.py - int n_intervals = bu.chain_intvl.size() / 2; - for(int intvl_idx = 0; intvl_idx < n_intervals; ++intvl_idx) { - std::vector<geom::Mat4> rts; - int op_start = bu.op_intvl[2*intvl_idx]; - int op_end = bu.op_intvl[2*intvl_idx+1]; - int n_intv_ops = op_end - op_start; - if(n_intv_ops) { - for(auto it = bu.operations[op_start].begin(); - it != bu.operations[op_start].end(); ++it) { - rts.push_back(*it); - } - ++op_start; - while(op_start < op_end) { - std::vector<geom::Mat4> tmp_rts; - for(auto i = bu.operations[op_start].begin(); - i != bu.operations[op_start].end(); ++i) { - for(auto j = rts.begin(); j != rts.end(); ++j) { - tmp_rts.push_back((*j)*(*i)); - } - } - rts = tmp_rts; - ++op_start; - } - } - for(int ch_idx = bu.chain_intvl[2*intvl_idx]; - ch_idx < bu.chain_intvl[2*intvl_idx+1]; ++ch_idx) { - for(auto it = rts.begin(); it != rts.end(); ++it) { - au_chain_names.push_back(bu.au_chains[ch_idx]); - transforms.push_back(*it); - } - } - } - - ChainNameGenerator gen; - for(uint bu_ch_idx = 0; bu_ch_idx < au_chain_names.size(); ++bu_ch_idx) { - String bu_ch_name = gen.Get(); - ost::mol::ChainHandle added_chain = ed.InsertChain(bu_ch_name); - this->FillChain(added_chain, ed, chain_data_.at(au_chain_names[bu_ch_idx]), - transforms[bu_ch_idx]); - } - - return ent; -} - void OMF::ToStream(std::ostream& stream) const { uint32_t magic_number = 42; @@ -5099,7 +4886,6 @@ void OMF::ToStream(std::ostream& stream) const { Dump(stream, residue_definitions_); } - Dump(stream, biounit_definitions_); Dump(stream, chain_data_, residue_definitions_, OptionSet(LOSSY), OptionSet(AVG_BFACTORS), OptionSet(ROUND_BFACTORS), OptionSet(SKIP_SS), OptionSet(INFER_POS)); @@ -5143,7 +4929,6 @@ void OMF::FromStream(std::istream& stream) { Load(stream, residue_definitions_); } - Load(stream, biounit_definitions_); Load(stream, chain_data_, residue_definitions_, version_, OptionSet(LOSSY), OptionSet(AVG_BFACTORS), OptionSet(ROUND_BFACTORS), OptionSet(SKIP_SS), OptionSet(INFER_POS)); diff --git a/modules/io/src/mol/omf.hh b/modules/io/src/mol/omf.hh index c912bcec35525078d6310240f7bf12770027d539..aa5eb6c2d0d2233d58611be7b323f17dba185caf 100644 --- a/modules/io/src/mol/omf.hh +++ b/modules/io/src/mol/omf.hh @@ -33,11 +33,9 @@ namespace ost { namespace io { const int OMF_VERSION = 2; class ChainData; -class BioUnitData; class OMF; typedef boost::shared_ptr<OMF> OMFPtr; typedef boost::shared_ptr<ChainData> ChainDataPtr; -typedef boost::shared_ptr<BioUnitData> BioUnitDataPtr; struct SidechainAtomRule { int sidechain_atom_idx; @@ -120,23 +118,6 @@ struct ResidueDefinition { std::vector<SidechainAtomRule> sidechain_atom_rules; }; - -struct BioUnitDefinition { - BioUnitDefinition() { } - - BioUnitDefinition(const ost::io::MMCifInfoBioUnit& bu); - - void ToStream(std::ostream& stream) const; - - void FromStream(std::istream& stream); - - std::vector<String> au_chains; - std::vector<int> chain_intvl; - std::vector<std::vector<geom::Mat4> > operations; - std::vector<int> op_intvl; -}; - - struct ChainData { ChainData(): ch_name(""), chain_type(ost::mol::CHAINTYPE_UNKNOWN) { } @@ -210,10 +191,6 @@ public: static OMFPtr FromEntity(const ost::mol::EntityHandle& ent, uint8_t options = 0); - static OMFPtr FromMMCIF(const ost::mol::EntityHandle& ent, - const MMCifInfo& info, - uint8_t options = 0); - static OMFPtr FromFile(const String& fn); static OMFPtr FromString(const String& s); @@ -224,8 +201,16 @@ public: ost::mol::EntityHandle GetAU() const; + ost::mol::EntityHandle GetEntity() const { + return this->GetAU(); + } + ost::mol::EntityHandle GetAUChain(const String& name) const; + ost::mol::EntityHandle GetEntityChain(const String& name) const { + return this->GetAUChain(name); + } + ost::mol::EntityHandle GetBU(int bu_idx) const; int GetVersion() const { return version_; } @@ -261,7 +246,6 @@ private: String name_; std::vector<ResidueDefinition> residue_definitions_; - std::vector<BioUnitDefinition> biounit_definitions_; std::map<String, ChainDataPtr> chain_data_; // bond features - only for bonds that are inter-chain diff --git a/modules/io/tests/test_io_omf.py b/modules/io/tests/test_io_omf.py index 8d519c5084969316d2aeab844aeac45cdd89d8bd..81c19f2e889bbd5dd9601bd69e0ae23bbbedc665 100644 --- a/modules/io/tests/test_io_omf.py +++ b/modules/io/tests/test_io_omf.py @@ -118,17 +118,16 @@ class TestOMF(unittest.TestCase): self.ent.SetName("This is a name 123") def test_AU(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() loaded_omf = io.OMF.FromBytes(omf_bytes) loaded_ent = loaded_omf.GetAU() self.assertTrue(compare_ent(self.ent, loaded_ent)) def test_default_peplib(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() - omf_def_pep = io.OMF.FromMMCIF(self.ent, self.info, - io.OMFOption.DEFAULT_PEPLIB) + omf_def_pep = io.OMF.FromEntity(self.ent, io.OMFOption.DEFAULT_PEPLIB) omf_def_pep_bytes = omf_def_pep.ToBytes() loaded_omf_def_pep = io.OMF.FromBytes(omf_def_pep_bytes) loaded_ent = loaded_omf_def_pep.GetAU() @@ -137,10 +136,9 @@ class TestOMF(unittest.TestCase): self.assertTrue(compare_ent(self.ent, loaded_ent)) def test_lossy(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() - omf_lossy = io.OMF.FromMMCIF(self.ent, self.info, - io.OMFOption.LOSSY) + omf_lossy = io.OMF.FromEntity(self.ent, io.OMFOption.LOSSY) omf_lossy_bytes = omf_lossy.ToBytes() loaded_omf_lossy = io.OMF.FromBytes(omf_lossy_bytes) loaded_ent = loaded_omf_lossy.GetAU() @@ -152,10 +150,9 @@ class TestOMF(unittest.TestCase): at_dist_thresh=max_dist)) def test_avg_bfactors(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() - omf_avg_bfac = io.OMF.FromMMCIF(self.ent, self.info, - io.OMFOption.AVG_BFACTORS) + omf_avg_bfac = io.OMF.FromEntity(self.ent, io.OMFOption.AVG_BFACTORS) omf_avg_bfac_bytes = omf_avg_bfac.ToBytes() loaded_omf_avg_bfac = io.OMF.FromBytes(omf_avg_bfac_bytes) loaded_ent = loaded_omf_avg_bfac.GetAU() @@ -174,10 +171,9 @@ class TestOMF(unittest.TestCase): self.assertTrue(abs(a.b_factor - exp_bfac) < 0.008) def test_round_bfactors(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() - omf_round_bfac = io.OMF.FromMMCIF(self.ent, self.info, - io.OMFOption.ROUND_BFACTORS) + omf_round_bfac = io.OMF.FromEntity(self.ent, io.OMFOption.ROUND_BFACTORS) omf_round_bfac_bytes = omf_round_bfac.ToBytes() loaded_omf_round_bfac = io.OMF.FromBytes(omf_round_bfac_bytes) loaded_ent = loaded_omf_round_bfac.GetAU() @@ -188,10 +184,9 @@ class TestOMF(unittest.TestCase): at_bfactor_thresh=0.5)) def test_skip_ss(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() - omf_skip_ss = io.OMF.FromMMCIF(self.ent, self.info, - io.OMFOption.SKIP_SS) + omf_skip_ss = io.OMF.FromEntity(self.ent, io.OMFOption.SKIP_SS) omf_skip_ss_bytes = omf_skip_ss.ToBytes() loaded_omf_skip_ss = io.OMF.FromBytes(omf_skip_ss_bytes) loaded_ent = loaded_omf_skip_ss.GetAU() @@ -201,10 +196,10 @@ class TestOMF(unittest.TestCase): self.assertTrue(compare_ent(self.ent, loaded_ent, skip_ss=True)) def test_infer_pep_bonds(self): - omf = io.OMF.FromMMCIF(self.ent, self.info) + omf = io.OMF.FromEntity(self.ent) omf_bytes = omf.ToBytes() - omf_infer_pep_bonds = io.OMF.FromMMCIF(self.ent, self.info, - io.OMFOption.INFER_PEP_BONDS) + omf_infer_pep_bonds = io.OMF.FromEntity(self.ent, + io.OMFOption.INFER_PEP_BONDS) omf_infer_pep_bonds_bytes = omf_infer_pep_bonds.ToBytes() loaded_omf_infer_pep_bonds = io.OMF.FromBytes(omf_infer_pep_bonds_bytes) loaded_ent = loaded_omf_infer_pep_bonds.GetAU() @@ -212,40 +207,6 @@ class TestOMF(unittest.TestCase): self.assertTrue(len(omf_infer_pep_bonds_bytes) < len(omf_bytes)) self.assertTrue(compare_ent(self.ent, loaded_ent)) - def test_multiple_BU(self): - ent, seqres, info = io.LoadMMCIF("testfiles/mmcif/3imj.cif.gz", - seqres=True, - info=True) - - omf = io.OMF.FromMMCIF(ent, info) - omf_bytes = omf.ToBytes() - omf_loaded = io.OMF.FromBytes(omf_bytes) - - # there are quite some discrepancies between PDBize and OMF - # - chain names: PDBize has specific chain names for ligands and - # water etc. OMF just iterates A, B, C, D, ... - # - skip_bonds: Thats qualified atom name based. PDBize used rnums - # and insertion codes for waters... - # - skip_rnums: Again, insertion codes for waters... - self.assertTrue(compare_ent(info.GetBioUnits()[0].PDBize(ent), - omf_loaded.GetBU(0), - skip_cnames=True, skip_bonds=True, - skip_rnums=True, bu_idx = 0)) - - self.assertTrue(compare_ent(info.GetBioUnits()[1].PDBize(ent), - omf_loaded.GetBU(1), - skip_cnames=True, skip_bonds=True, - skip_rnums=True, bu_idx = 1)) - - # no check for the full guy... problem: PDBize throws all water - # molecules in the same chain, whereas OMF keeps them separate - # as in the chains from the assymetric unit... maybe needs some - # thinking on how to resolve discrepancies between PDBize and OMF - #self.assertTrue(compare_ent(omf_loaded.GetBU(2), - # info.GetBioUnits()[2].PDBize(ent), - # skip_cnames=True, skip_bonds=True, - # skip_rnums=True)) - if __name__== '__main__': from ost import testutils if testutils.DefaultCompoundLibIsSet():