diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 265f868f24815905318db728434bfcd4c3008bb5..6b4d05b816fbda4db340a72bf4faabb46b937e17 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -52,6 +52,8 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=False, """ Load PDB file from disk and returns one or more entities. Several options allow to customize the exact behaviour of the PDB import. + + Residues are flagged as ligand if they are mentioned in a HET record. :param restrict_chains: If not an empty string, only chains listed in the string will be imported. diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index b3059d49f3ac9f01a39e1c4a3ec082b3712488b4..0e3c60c4035629b153b68816452c7f7dcec6d926 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -47,7 +47,7 @@ bool IEquals(const StringRef& a, const StringRef& b) return false; } for (size_t i=0; i<a.size(); ++i) { - if (toupper(a[i])!=toupper(b[i])) { + if (toupper(a[i])!=b[i]) { return false; } } @@ -113,7 +113,8 @@ bool PDBReader::HasNext() IEquals(curr_line.substr(0, 6),StringRef("ANISOU ", 6)) || IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6)) || IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6)) || - IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6)))) { + IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6)) || + IEquals(curr_line.substr(0, 6), StringRef("HET ", 6)))) { return true; } else if (IEquals(curr_line.rtrim(), StringRef("END", 3))) { hard_end_=true; @@ -188,6 +189,21 @@ void PDBReader::Import(mol::EntityHandle& ent, if (!(PDB::Flags() & PDB::CHARMM_FORMAT)) { this->ParseHelixEntry(curr_line); } + } else if (IEquals(curr_line.substr(0, 6), StringRef("HET ", 6))) { + // remember het entry to mark the residues as ligand during ATOM import + char chain=curr_line[12]; + std::pair<bool, int> num=curr_line.substr(13, 4).ltrim().to_int(); + if (!num.first) { + if (PDB::Flags() && PDB::SKIP_FAULTY_RECORDS) { + LOG_WARNING("Invalid HET entry on line " << line_num_); + continue; + } else { + String msg=str(format("Invalid HET entry on line %d")%line_num_); + throw IOException(msg); + } + } + hets_.push_back(HetEntry(chain, to_res_num(num.second, + curr_line[17]))); } break; case 'M': @@ -231,6 +247,12 @@ void PDBReader::Import(mol::EntityHandle& ent, << helix_list_.size() << " helices and " << strand_list_.size() << " strands"); this->AssignSecStructure(ent); + for (HetList::const_iterator i=hets_.begin(), e=hets_.end(); i!=e; ++i) { + mol::ResidueHandle res=ent.FindResidue(String(1, i->chain), i->num); + if (res.IsValid()) { + res.SetIsLigand(true); + } + } } @@ -285,6 +307,7 @@ void PDBReader::ClearState() hard_end_=false; helix_list_.clear(); strand_list_.clear(); + hets_.clear(); } bool PDBReader::EnsureLineLength(const StringRef& line, size_t size) diff --git a/modules/io/src/mol/pdb_reader.hh b/modules/io/src/mol/pdb_reader.hh index 2c2ca2c1a16eadd110ad79aca78dcf31e8afc06b..f18258a73b03c3dfb6ea2b92ebace31154d89075 100644 --- a/modules/io/src/mol/pdb_reader.hh +++ b/modules/io/src/mol/pdb_reader.hh @@ -38,7 +38,13 @@ class DLLEXPORT_OST_IO PDBReader { mol::ResNum end; String chain; }; + struct HetEntry { + HetEntry(char c, mol::ResNum n): chain(c), num(n) {} + char chain; + mol::ResNum num; + }; typedef std::vector<HSEntry> HSList; + typedef std::vector<HetEntry> HetList; public: PDBReader(const String& filename); PDBReader(const boost::filesystem::path& loc); @@ -77,12 +83,11 @@ private: String restrict_chains_; HSList helix_list_; HSList strand_list_; - boost::filesystem::ifstream infile_; std::istream& instream_; boost::iostreams::filtering_stream<boost::iostreams::input> in_; String curr_line_; - + HetList hets_; // this needs to be set to true for reading pqr // file (i.e. pdb formatted file with charges in occupacy // column, and radii in b-factor column) diff --git a/modules/io/tests/test_io_pdb.cc b/modules/io/tests/test_io_pdb.cc index 292495a1de60d07b195da26d54a81e132c3e6eb5..8789bd82f2dd26f6446e23d0f5fcf02ea9059e7a 100644 --- a/modules/io/tests/test_io_pdb.cc +++ b/modules/io/tests/test_io_pdb.cc @@ -143,6 +143,15 @@ BOOST_AUTO_TEST_CASE(calpha_only_import_on) BOOST_CHECK_EQUAL(ent.GetAtomCount(), 1); } +BOOST_AUTO_TEST_CASE(het_import) +{ + String fname("testfiles/pdb/het.pdb"); + PDBReader reader(fname); + mol::EntityHandle ent=mol::CreateEntity(); + reader.Import(ent); + BOOST_CHECK_EQUAL(ent.Select("ligand=true").GetResidueCount(), 1); +} + BOOST_AUTO_TEST_CASE(calpha_only_import_off) { String fname("testfiles/pdb/calpha.pdb"); diff --git a/modules/io/tests/testfiles/pdb/het.pdb b/modules/io/tests/testfiles/pdb/het.pdb new file mode 100644 index 0000000000000000000000000000000000000000..f8fa944aa32acf2922dab16061d777d3a0ef42ce --- /dev/null +++ b/modules/io/tests/testfiles/pdb/het.pdb @@ -0,0 +1,13 @@ +HET AP5 A1215C 64 +ATOM 20 N ILE A 3 26.039 48.836 36.139 1.00 21.59 N +ATOM 21 CA ILE A 3 24.961 47.988 35.671 1.00 23.90 C +ATOM 22 C ILE A 3 25.374 47.080 34.537 1.00 21.12 C +ATOM 23 O ILE A 3 26.029 47.614 33.642 1.00 24.59 O +ATOM 24 CB ILE A 3 23.802 48.880 35.202 1.00 23.84 C +ATOM 25 CG1 ILE A 3 23.317 49.724 36.378 1.00 26.14 C +ATOM 26 CG2 ILE A 3 22.660 48.010 34.642 1.00 19.29 C +ATOM 27 CD1 ILE A 3 22.436 50.890 35.992 1.00 24.97 C +HETATM 3320 PA AP5 A1215C 18.089 46.955 20.531 1.00 17.77 P +HETATM 3321 O1A AP5 A1215C 17.885 47.954 21.576 1.00 16.47 O +HETATM 3322 O2A AP5 A1215C 18.847 47.325 19.359 1.00 15.16 O +END diff --git a/modules/mol/base/pymod/export_residue.cc b/modules/mol/base/pymod/export_residue.cc index ef844831cb222c399cf5a4405803be9224c2c48e..66892c8de2dae95f99a73714b2a0ff3b4dfb6e20 100644 --- a/modules/mol/base/pymod/export_residue.cc +++ b/modules/mol/base/pymod/export_residue.cc @@ -120,6 +120,9 @@ void export_Residue() .def("GetChemClass", &ResidueBase::GetChemClass) .def("SetChemClass", set_chemclass1) .def("SetChemClass", set_chemclass2) + .add_property("is_ligand", &ResidueBase::IsLigand, &ResidueBase::SetIsLigand) + .def("IsLigand", &ResidueBase::IsLigand) + .def("SetIsLigand", &ResidueBase::SetIsLigand) .add_property("number", make_function(&ResidueBase::GetNumber, return_value_policy<copy_const_reference>())) @@ -161,7 +164,7 @@ void export_Residue() .add_property("atom_count", &ResidueHandle::GetAtomCount) .add_property("index", &ResidueHandle::GetIndex) .def("Select", select_string, arg("flags")=0) - .def("Select", select_query, arg("flags")=0) + .def("Select", select_query, arg("flags")=0) .def("GetMass", &ResidueHandle::GetMass) .def("GetCenterOfMass", &ResidueHandle::GetCenterOfMass) .def("GetCenterOfAtoms", &ResidueHandle::GetCenterOfAtoms) diff --git a/modules/mol/base/src/impl/chain_impl.cc b/modules/mol/base/src/impl/chain_impl.cc index 64fed74185c0efa3759a64780c6634035232eeea..4d5313a38fbafd97e9fb0fa658febeb71743b107 100644 --- a/modules/mol/base/src/impl/chain_impl.cc +++ b/modules/mol/base/src/impl/chain_impl.cc @@ -67,6 +67,7 @@ ResidueImplPtr ChainImpl::AppendResidue(const ResidueImplPtr& res) dst_res->SetSecStructure(res->GetSecStructure()); dst_res->SetChemClass(res->GetChemClass()); dst_res->SetProtein(res->IsProtein()); + dst_res->SetIsLigand(res->IsLigand()); return dst_res; } diff --git a/modules/mol/base/src/impl/residue_impl.cc b/modules/mol/base/src/impl/residue_impl.cc index 1f2fe7faf12ffed48227845659186bc20e02c7f7..8a92cb01e83638f40d30f73536fade13e29141fd 100644 --- a/modules/mol/base/src/impl/residue_impl.cc +++ b/modules/mol/base/src/impl/residue_impl.cc @@ -42,8 +42,10 @@ ResidueImpl::ResidueImpl(const EntityImplPtr& ent, key_(key), atom_list_(), sec_structure_(SecStructure::COIL), - olc_('?') -{} + olc_('?'), + protein_(false), ligand_(false) +{ +} AtomImplPtr ResidueImpl::InsertAtom(const String& name, diff --git a/modules/mol/base/src/impl/residue_impl.hh b/modules/mol/base/src/impl/residue_impl.hh index a9f8481d2304d501d8a3565d183a044f811ffccf..925204982bcd1ecaabf08e7ef3e1f7479073fd41 100644 --- a/modules/mol/base/src/impl/residue_impl.hh +++ b/modules/mol/base/src/impl/residue_impl.hh @@ -202,6 +202,9 @@ public: void SetProtein(bool protein) { protein_=protein; } bool IsProtein() const { return protein_; } + + bool IsLigand() const { return ligand_; } + void SetIsLigand(bool flag) { ligand_=flag; } private: void AddAltAtom(const String& group, const AtomImplPtr& atom, const geom::Vec3& position); @@ -219,6 +222,7 @@ private: char olc_; // whether the residue is part of the protein. bool protein_; + bool ligand_; }; }}} // ns diff --git a/modules/mol/base/src/property_id.cc b/modules/mol/base/src/property_id.cc index a6408b83e97407a26060faac0824dc0659403493..0ce93195b8d8fcb0a8bf41752661381705b5be44 100644 --- a/modules/mol/base/src/property_id.cc +++ b/modules/mol/base/src/property_id.cc @@ -51,6 +51,7 @@ struct Properties : public boost::spirit::symbols<Prop> { ("peptide", Prop(Prop::PEPTIDE, Prop::INT, Prop::RESIDUE)) ("rindex", Prop(Prop::RINDEX, Prop::INT, Prop::RESIDUE)) ("protein", Prop(Prop::PROTEIN, Prop::INT, Prop::RESIDUE)) + ("ligand", Prop(Prop::LIGAND, Prop::INT, Prop::RESIDUE)) ("water", Prop(Prop::WATER, Prop::INT, Prop::RESIDUE)) ("acharge", Prop(Prop::ACHARGE, Prop::FLOAT, Prop::ATOM)); } @@ -116,6 +117,8 @@ String Prop::GetName() const return "rindex"; case PROTEIN: return "protein"; + case LIGAND: + return "ligand"; case WATER: return "water"; default: @@ -128,7 +131,7 @@ String Prop::GetTypeName() const switch(type) { case Prop::STRING: - return "String"; + return "string"; case Prop::FLOAT: return "floating point"; case Prop::INT: diff --git a/modules/mol/base/src/property_id.hh b/modules/mol/base/src/property_id.hh index 4ea4be13fc66c96512edfca18169967602f747da..f8a541ce75798e1c2e143414dae09d6ac154db9c 100644 --- a/modules/mol/base/src/property_id.hh +++ b/modules/mol/base/src/property_id.hh @@ -40,7 +40,7 @@ public: /// respectively. typedef enum { RNAME, ANAME, CNAME, ELE, RNUM, ANUM, AX, AY, AZ, OCC, RTYPE, ISHETATM, - RBFAC, ABFAC, PEPTIDE, ACHARGE, RINDEX, PROTEIN, WATER, WITHIN, + RBFAC, ABFAC, PEPTIDE, ACHARGE, RINDEX, PROTEIN, LIGAND, WATER, WITHIN, UNDEF, CUSTOM } ID; diff --git a/modules/mol/base/src/query_state.cc b/modules/mol/base/src/query_state.cc index 03b2ea28b9236fa1088791541faf62e1a6351436..5df2d900b8b95c428e03d4a30b58e03ee2f68281 100644 --- a/modules/mol/base/src/query_state.cc +++ b/modules/mol/base/src/query_state.cc @@ -217,6 +217,10 @@ boost::logic::tribool QueryState::EvalResidue(const impl::ResidueImplPtr& r) { int_value=r->GetChemClass().IsWater(); s_[*i]=cmp_num<int>(ss.comp_op,int_value,boost::get<int>(ss.param)); break; + case Prop::LIGAND: + int_value=r->IsLigand(); + s_[*i]=cmp_num<int>(ss.comp_op,int_value,boost::get<int>(ss.param)); + break; case Prop::RTYPE: p=boost::get<String>(ss.param); if (p.length()>1) { diff --git a/modules/mol/base/src/residue_base.cc b/modules/mol/base/src/residue_base.cc index 9ae05d783a17e7885f3f1b2f8896cdcb1a7894be..2ddf33c2e52af2443e6c3527c9771f44005a4159 100644 --- a/modules/mol/base/src/residue_base.cc +++ b/modules/mol/base/src/residue_base.cc @@ -193,5 +193,17 @@ void ResidueBase::SetIsProtein(bool protein) Impl()->SetProtein(protein); } +void ResidueBase::SetIsLigand(bool ligand) +{ + this->CheckValidity(); + return Impl()->SetIsLigand(ligand); +} + +bool ResidueBase::IsLigand() const +{ + this->CheckValidity(); + return Impl()->IsLigand(); +} + }} diff --git a/modules/mol/base/src/residue_base.hh b/modules/mol/base/src/residue_base.hh index 1ad7438795fa240422afc912645ab5025c48d50d..363ff875c7022eeb2f845913f228dbde8880f8cd 100644 --- a/modules/mol/base/src/residue_base.hh +++ b/modules/mol/base/src/residue_base.hh @@ -144,6 +144,10 @@ public: bool IsProtein() const; void SetIsProtein(bool protein); + + void SetIsLigand(bool ligand); + + bool IsLigand() const; public: impl::ResidueImplPtr& Impl(); diff --git a/modules/mol/base/tests/test_entity.cc b/modules/mol/base/tests/test_entity.cc index 93114774015661befd2dca50f2e94eacea521ab3..ac262c406cc7620febc688ef57237ad646f879c4 100644 --- a/modules/mol/base/tests/test_entity.cc +++ b/modules/mol/base/tests/test_entity.cc @@ -337,6 +337,7 @@ BOOST_AUTO_TEST_CASE(copy_residue_props) ResidueHandle res=edi.AppendResidue(ch, "DUMMY", mol::ResNum(666, '6')); res.SetOneLetterCode('X'); res.SetIsProtein(true); + res.SetIsLigand(true); ChemClass cl(ChemClass::LPeptideLinking); res.SetSecStructure(SecStructure(SecStructure::ALPHA_HELIX)); res.SetChemClass(cl); @@ -348,6 +349,7 @@ BOOST_AUTO_TEST_CASE(copy_residue_props) BOOST_CHECK_EQUAL(res2.GetChemClass(), cl); BOOST_CHECK_EQUAL(res.GetSecStructure(), SecStructure::ALPHA_HELIX); BOOST_CHECK(res2.IsProtein()==true); + BOOST_CHECK(res2.IsLigand()==true); BOOST_CHECK_EQUAL(res2.GetNumber(), ResNum(666, '6')); }