diff --git a/modules/io/pymod/export_omf_io.cc b/modules/io/pymod/export_omf_io.cc index b50392dc09ce99de19489f493d1908d681501cd8..fb98237b0ffbb2e5c6e5cd6d00ceceeefde080dd 100644 --- a/modules/io/pymod/export_omf_io.cc +++ b/modules/io/pymod/export_omf_io.cc @@ -73,6 +73,7 @@ void export_omf_io() { .def("GetAU", &OMF::GetAU) .def("GetAUChain", &OMF::GetAUChain) .def("GetBU", &OMF::GetBU) + .def("GetName", &OMF::GetName) .def("GetChainNames", &wrap_get_chain_names) .def("GetPositions", &OMF::GetPositions, return_value_policy<reference_existing_object>(),(arg("cname"))) .def("GetBFactors", &OMF::GetBFactors, return_value_policy<reference_existing_object>(),(arg("cname"))) diff --git a/modules/io/src/mol/omf.cc b/modules/io/src/mol/omf.cc index f6a75c90e6516e6614556de6f2dc3f978e91a7ad..973595161ed6295631bf6e26593d9984b81b7d1e 100644 --- a/modules/io/src/mol/omf.cc +++ b/modules/io/src/mol/omf.cc @@ -174,10 +174,12 @@ namespace{ int n = 0; int abs_min = std::abs(min); for(auto it = vec.begin(); it != vec.end(); ++it) { - if(*it > max) { - n += (*it/max + 1); - } else if (*it < min) { - n += (std::abs(*it)/abs_min + 1); + if(*it >= max) { + n += *it/max; + ++n; + } else if (*it <= min) { + n += std::abs(*it)/abs_min; + ++n; } else { ++n; } @@ -186,10 +188,16 @@ namespace{ } template<typename T> - void IntegerPacking(const std::vector<int>& in, std::vector<T>& out) { + void IntegerPacking(const std::vector<int>& in, std::vector<T>& out, + int min, int max) { - int min = std::numeric_limits<T>::min(); - int max = std::numeric_limits<T>::max(); + if(min < std::numeric_limits<T>::min()) { + throw ost::Error("Invalid min val in IntegerPacking"); + } + + if(max > std::numeric_limits<T>::max()) { + throw ost::Error("Invalid max val in IntegerPacking"); + } if(max <= min) { throw ost::Error("Min max error in IntegerPackingSize"); @@ -223,22 +231,29 @@ namespace{ } template<typename T> - void IntegerUnpacking(const std::vector<T>& in, std::vector<int>& out) { - int min = std::numeric_limits<T>::min(); - int max = std::numeric_limits<T>::max(); + void IntegerUnpacking(const std::vector<T>& in, std::vector<int>& out, + int min, int max) { + + if(min < std::numeric_limits<T>::min()) { + throw ost::Error("Invalid min val in IntegerUnpacking"); + } + + if(max > std::numeric_limits<T>::max()) { + throw ost::Error("Invalid max val in IntegerUnpacking"); + } if(max <= min) { - throw ost::Error("Min max error in IntegerPackingSize"); + throw ost::Error("Min max error in IntegerUnpacking"); } // We don't allow unsigned packing here => min must be negative, // max must be positive if(min >= 0) { - throw ost::Error("Min val in IntegerPacking must be negative"); + throw ost::Error("Min val in IntegerUnpacking must be negative"); } if(max <= 0) { - throw ost::Error("Max val in IntegerPacking must be positive"); + throw ost::Error("Max val in IntegerUnpacking must be positive"); } out.clear(); @@ -354,11 +369,13 @@ namespace{ if(encoding == 8) { std::vector<int8_t> int8_vec; LoadIntVec(stream, int8_vec); - IntegerUnpacking(int8_vec, vec); + IntegerUnpacking(int8_vec, vec, std::numeric_limits<int8_t>::min(), + std::numeric_limits<int8_t>::max()); } else if(encoding == 16) { std::vector<int16_t> int16_vec; LoadIntVec(stream, int16_vec); - IntegerUnpacking(int16_vec, vec); + IntegerUnpacking(int16_vec, vec, std::numeric_limits<int16_t>::min(), + std::numeric_limits<int16_t>::max()); } else if(encoding == 32) { LoadIntVec(stream, vec); } else { @@ -390,12 +407,16 @@ namespace{ DumpIntVec(stream, vec); } else if(encoding == 16) { std::vector<int16_t> packed; - IntegerPacking(vec, packed); + IntegerPacking(vec, packed, std::numeric_limits<int16_t>::min(), + std::numeric_limits<int16_t>::max()); DumpIntVec(stream, packed); } else if(encoding == 8) { std::vector<int8_t> packed; - IntegerPacking(vec, packed); + IntegerPacking(vec, packed, std::numeric_limits<int8_t>::min(), + std::numeric_limits<int8_t>::max()); DumpIntVec(stream, packed); + } else { + throw ost::Error("AAAAAAAAaaaaaa!"); } } @@ -689,6 +710,124 @@ namespace{ void DumpResDefIndices(std::ostream& stream, const std::vector<int>& indices) { Dump(stream, indices); } + + void LoadSecStructures(std::istream& stream, + std::vector<char>& sec_structures) { + std::vector<int> run_length_encoded; + Load(stream, run_length_encoded); + std::vector<int> transformed_sec_structures; + RunLengthDecoding(run_length_encoded, transformed_sec_structures); + sec_structures.clear(); + sec_structures.reserve(transformed_sec_structures.size()); + for(auto it = transformed_sec_structures.begin(); + it != transformed_sec_structures.end(); ++it) { + switch(*it) { + case 0: { + sec_structures.push_back(ost::mol::SecStructure::ALPHA_HELIX); + break; + } + case 1: { + sec_structures.push_back(ost::mol::SecStructure::COIL); + break; + } + case 2: { + sec_structures.push_back(ost::mol::SecStructure::THREE_TEN_HELIX); + break; + } + case 3: { + sec_structures.push_back(ost::mol::SecStructure::TURN); + break; + } + case 4: { + sec_structures.push_back(ost::mol::SecStructure::EXTENDED); + break; + } + case 5: { + sec_structures.push_back(ost::mol::SecStructure::BETA_BRIDGE); + break; + } + case 6: { + sec_structures.push_back(ost::mol::SecStructure::BEND); + break; + } + case 7: { + sec_structures.push_back(ost::mol::SecStructure::PI_HELIX); + break; + } + default: { + throw ost::Error("Invalid sec structure observed"); + } + } + } + } + + void DumpSecStructures(std::ostream& stream, + const std::vector<char>& sec_structures) { + std::vector<int> transformed_sec_structures; + transformed_sec_structures.reserve(sec_structures.size()); + for(auto it = sec_structures.begin(); it != sec_structures.end(); ++it) { + switch(*it) { + case ost::mol::SecStructure::ALPHA_HELIX: { + transformed_sec_structures.push_back(0); + break; + } + case ost::mol::SecStructure::COIL: { + transformed_sec_structures.push_back(1); + break; + } + case ost::mol::SecStructure::THREE_TEN_HELIX: { + transformed_sec_structures.push_back(2); + break; + } + case ost::mol::SecStructure::TURN: { + transformed_sec_structures.push_back(3); + break; + } + case ost::mol::SecStructure::EXTENDED: { + transformed_sec_structures.push_back(4); + break; + } + case ost::mol::SecStructure::BETA_BRIDGE: { + transformed_sec_structures.push_back(5); + break; + } + case ost::mol::SecStructure::BEND: { + transformed_sec_structures.push_back(6); + break; + } + case ost::mol::SecStructure::PI_HELIX: { + transformed_sec_structures.push_back(7); + break; + } + default: { + throw ost::Error("Invalid sec structure observed"); + } + } + } + std::vector<int> run_length_encoded; + RunLengthEncoding(transformed_sec_structures, run_length_encoded); + Dump(stream, run_length_encoded); + } + + void DumpName(std::ostream& stream, const String& name) { + if(name.size() > std::numeric_limits<uint8_t>::max()) { + std::stringstream ss; + ss << "Max name size that can be dumped is "; + ss << std::numeric_limits<uint8_t>::max << ". "; + ss << "got: "<<name<<std::endl; + throw ost::Error(ss.str()); + } + uint8_t size = name.size(); + stream.write(reinterpret_cast<char*>(&size), sizeof(uint8_t)); + stream.write(reinterpret_cast<const char*>(&name[0]), size*sizeof(char)); + } + + void LoadName(std::istream& stream, String& name) { + uint8_t size; + stream.read(reinterpret_cast<char*>(&size), sizeof(uint8_t)); + name.resize(size); + stream.read(reinterpret_cast<char*>(&name[0]), size*sizeof(uint8_t)); + } } @@ -922,7 +1061,7 @@ void ChainData::ToStream(std::ostream& stream, DumpBonds(stream, bonds); DumpBondOrders(stream, bond_orders); if(!skip_ss) { - DumpIntVec(stream, sec_structures); + DumpSecStructures(stream, sec_structures); } } @@ -958,7 +1097,11 @@ void ChainData::FromStream(std::istream& stream, if(skip_ss) { sec_structures.assign(res_def_indices.size(), 'C'); } else { - LoadIntVec(stream, sec_structures); + if(version >= 2) { + LoadSecStructures(stream, sec_structures); + } else { + LoadIntVec(stream, sec_structures); + } } } @@ -3235,7 +3378,9 @@ OMFPtr OMF::FromEntity(const ost::mol::EntityHandle& ent, uint8_t options) { OMFPtr omf(new OMF); + omf->name_ = ent.GetName(); omf->options_ = options; + omf->version_ = OMF_VERSION; ////////////////////////////////////////////////////////////////////////////// // Generate kind of a "mini compound library"... Eeach unique residue gets // @@ -3415,6 +3560,7 @@ String OMF::ToString() const { ost::mol::EntityHandle OMF::GetAU() const{ ost::mol::EntityHandle ent = mol::CreateEntity(); + ent.SetName(name_); ost::mol::XCSEditor ed = ent.EditXCS(mol::BUFFERED_EDIT); for(auto it = chain_data_.begin(); it!=chain_data_.end(); ++it) { @@ -3440,6 +3586,9 @@ ost::mol::EntityHandle OMF::GetAUChain(const String& name) const{ throw ost::Error("No chain of name " + name); } ost::mol::EntityHandle ent = mol::CreateEntity(); + std::stringstream ss; + ss << name_ << " " << name; + ent.SetName(ss.str()); ost::mol::XCSEditor ed = ent.EditXCS(mol::BUFFERED_EDIT); ost::mol::ChainHandle ch = ed.InsertChain(name); this->FillChain(ch, ed, chain_data_.at(name)); @@ -3453,6 +3602,9 @@ ost::mol::EntityHandle OMF::GetBU(int bu_idx) const{ const BioUnitDefinition& bu = biounit_definitions_[bu_idx]; ost::mol::EntityHandle ent = mol::CreateEntity(); + std::stringstream ss; + ss << name_ << " " << bu_idx; + ent.SetName(ss.str()); ost::mol::XCSEditor ed = ent.EditXCS(mol::BUFFERED_EDIT); std::vector<String> au_chain_names; @@ -3511,9 +3663,10 @@ void OMF::ToStream(std::ostream& stream) const { // We set it to the current version... // If you loaded a structure from a previous version and you dump it again, // the version will be updated. - uint32_t version = OMF_VERSION; + uint32_t version = version_; stream.write(reinterpret_cast<char*>(&version), sizeof(uint32_t)); stream.write(reinterpret_cast<const char*>(&options_), sizeof(uint8_t)); + DumpName(stream, name_); if(OptionSet(DEFAULT_PEPLIB)) { // no need to dump the residue definitions from default lib @@ -3555,6 +3708,7 @@ void OMF::FromStream(std::istream& stream) { if(version_ > 1) { stream.read(reinterpret_cast<char*>(&options_), sizeof(uint8_t)); + LoadName(stream, name_); } if(OptionSet(DEFAULT_PEPLIB)) { diff --git a/modules/io/src/mol/omf.hh b/modules/io/src/mol/omf.hh index e049e53482d65124b85c26e8a1b3b969c767c858..f5bb8b9966b0b5c74d9d114f745f212c16b52799 100644 --- a/modules/io/src/mol/omf.hh +++ b/modules/io/src/mol/omf.hh @@ -190,6 +190,9 @@ public: // data access without requirement of generating a full // OpenStructure entity + + String GetName() const { return name_; } + std::vector<String> GetChainNames() const; const geom::Vec3List& GetPositions(const String& cname) const; @@ -210,6 +213,7 @@ private: const ChainDataPtr data, geom::Mat4 transform = geom::Mat4()) const; + String name_; std::vector<ResidueDefinition> residue_definitions_; std::vector<BioUnitDefinition> biounit_definitions_; std::map<String, ChainDataPtr> chain_data_; diff --git a/modules/io/tests/test_io_omf.py b/modules/io/tests/test_io_omf.py index 840d9f42d75794620b8e45b312a90ed826f2b232..1ed14044a23919d8f9624254a7d270db2d815a8d 100644 --- a/modules/io/tests/test_io_omf.py +++ b/modules/io/tests/test_io_omf.py @@ -78,7 +78,13 @@ def compare_bonds(ent1, ent2): def compare_ent(ent1, ent2, at_occupancy_thresh = 0.01, at_bfactor_thresh = 0.01, at_dist_thresh = 0.001, skip_ss=False, skip_cnames = False, skip_bonds = False, - skip_rnums=False): + skip_rnums=False, bu_idx = None): + if bu_idx is not None: + if ent1.GetName() + ' ' + str(bu_idx) != ent2.GetName(): + return False + else: + if ent1.GetName() != ent2.GetName(): + return False chain_names_one = [ch.GetName() for ch in ent1.chains] chain_names_two = [ch.GetName() for ch in ent2.chains] if skip_cnames: @@ -109,6 +115,7 @@ class TestOMF(unittest.TestCase): self.ent = ent self.seqres = seqres self.info = info + self.ent.SetName("This is a name 123") def test_AU(self): omf = io.OMF.FromMMCIF(self.ent, self.info) @@ -220,15 +227,15 @@ class TestOMF(unittest.TestCase): # - skip_bonds: Thats qualified atom name based. PDBize used rnums # and insertion codes for waters... # - skip_rnums: Again, insertion codes for waters... - self.assertTrue(compare_ent(omf_loaded.GetBU(0), - info.GetBioUnits()[0].PDBize(ent), + self.assertTrue(compare_ent(info.GetBioUnits()[0].PDBize(ent), + omf_loaded.GetBU(0), skip_cnames=True, skip_bonds=True, - skip_rnums=True)) + skip_rnums=True, bu_idx = 0)) - self.assertTrue(compare_ent(omf_loaded.GetBU(1), - info.GetBioUnits()[1].PDBize(ent), + self.assertTrue(compare_ent(info.GetBioUnits()[1].PDBize(ent), + omf_loaded.GetBU(1), skip_cnames=True, skip_bonds=True, - skip_rnums=True)) + skip_rnums=True, bu_idx = 1)) # no check for the full guy... problem: PDBize throws all water # molecules in the same chain, whereas OMF keeps them separate diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 66dccf7545956e1dd1fdbe33f755a10b0791c7a6..359aa0e67b9dbe6f131d074610219a434e162f15 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -172,7 +172,7 @@ class Scorer: if molck_settings is None: molck_settings = MolckSettings(rm_unk_atoms=True, - rm_non_std=True, + rm_non_std=False, rm_hyd_atoms=True, rm_oxt_atoms=True, rm_zero_occ_atoms=False,