diff --git a/CMakeLists.txt b/CMakeLists.txt index bc5d8264183ab24a9b9d819a82f8147a2dae8134..7b8f348b2246f4603a093b45ca3fa8d473fbff1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -283,8 +283,9 @@ endif() add_subdirectory(modules) add_subdirectory(scripts) add_subdirectory(deployment) - set(FILES_TO_BE_REMOVED ${CMAKE_SOURCE_DIR}/stage tests) +add_subdirectory(tools) +set(FILES_TO_BE_REMOVED CMakeFiles stage tests) set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${FILES_TO_BE_REMOVED}") diff --git a/modules/conop/pymod/export_non_standard.cc b/modules/conop/pymod/export_non_standard.cc index 025d2819a100584c85f3b088ce5320fdf02d0762..50b1fd2d6e169e31ff5e192e03b19b98541bf89d 100644 --- a/modules/conop/pymod/export_non_standard.cc +++ b/modules/conop/pymod/export_non_standard.cc @@ -39,12 +39,15 @@ object copy_non_conserved_handle(ResidueHandle src_res, ResidueHandle dst_res, return make_tuple(ret, has_cbeta); } +bool (*copy_residue_handle)(ost::mol::ResidueHandle, + ost::mol::ResidueHandle, + ost::mol::XCSEditor&)=&CopyResidue; void export_NonStandard() { def("CopyNonConserved",©_non_conserved_handle); def("CopyConserved", copy_conserved_handle); - def("CopyResidue", &CopyResidue); + def("CopyResidue", copy_residue_handle); } diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 344f5e00755d3ba733f1d6a877840037c8da802b..8f493e83e2a8daea025925fce3ac64b33cd7ea7a 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -4,6 +4,8 @@ builder_fw.hh conop.hh heuristic_builder.hh amino_acids.hh +diag.hh +model_check.hh compound.hh compound_lib.hh module_config.hh @@ -16,6 +18,8 @@ set(OST_CONOP_SOURCES builder.cc amino_acids.cc conop.cc +diag.cc +model_check.cc heuristic_builder.cc compound.cc compound_lib.cc diff --git a/modules/conop/src/nonstandard.cc b/modules/conop/src/nonstandard.cc index b8f09f283e7676be010b2a1b0720b60001536354..eeccdb60d16425dd1107629e41fb075100a7c0c3 100644 --- a/modules/conop/src/nonstandard.cc +++ b/modules/conop/src/nonstandard.cc @@ -37,11 +37,22 @@ namespace ost { namespace conop { bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi) +{ + // first let's get our hands on the component library + conop::BuilderP builder=conop::Conopology::Instance().GetBuilder("DEFAULT"); + conop::RuleBasedBuilderPtr rbb=dyn_cast<conop::RuleBasedBuilder>(builder); + conop::CompoundLibPtr comp_lib=rbb->GetCompoundLib(); + return CopyResidue(src_res, dst_res, edi, comp_lib); +} + +bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi, CompoundLibPtr comp_lib) { bool has_cbeta = false; bool ret; - if (src_res.GetName()==dst_res.GetName()) { - ret = CopyConserved(src_res, dst_res, edi, has_cbeta); + char parent_src = (comp_lib->FindCompound(src_res.GetName(),Compound::PDB))->GetOneLetterCode (); + char parent_dst = (comp_lib->FindCompound(dst_res.GetName(),Compound::PDB))->GetOneLetterCode (); + if (parent_src==parent_dst) { + ret = CopyConserved(src_res, dst_res, edi, has_cbeta, comp_lib); } else { ret = CopyNonConserved(src_res, dst_res, edi, has_cbeta); } @@ -53,8 +64,19 @@ bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi) return ret; } + bool CopyConserved(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi, bool& has_cbeta) +{ + // first let's get our hands on the component library + conop::BuilderP builder=conop::Conopology::Instance().GetBuilder("DEFAULT"); + conop::RuleBasedBuilderPtr rbb=dyn_cast<conop::RuleBasedBuilder>(builder); + conop::CompoundLibPtr comp_lib=rbb->GetCompoundLib(); + return CopyConserved(src_res,dst_res,edi,has_cbeta,comp_lib); +} + +bool CopyConserved(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi, + bool& has_cbeta, CompoundLibPtr comp_lib) { // check if the residue name of dst and src are the same. In the easy // case, the two residue names match and we just copy over all atoms. @@ -67,10 +89,11 @@ bool CopyConserved(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi, } else if (src_res.GetName()=="MSE") { return CopyMSE(src_res, dst_res, edi, has_cbeta); } else { - return CopyModified(src_res, dst_res, edi, has_cbeta); + return CopyModified(src_res, dst_res, edi, has_cbeta, comp_lib); } } + bool CopyIdentical(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi, bool& has_cbeta) { @@ -117,7 +140,7 @@ bool CopyMSE(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi, } bool CopyModified(ResidueHandle src_res, ResidueHandle dst_res, - XCSEditor& edi, bool& has_cbeta) + XCSEditor& edi, bool& has_cbeta, CompoundLibPtr comp_lib) { // FIXME: for now this functions ignores chirality changes of sidechain // chiral atoms. To detect those changes, we would need to store the @@ -128,11 +151,6 @@ bool CopyModified(ResidueHandle src_res, ResidueHandle dst_res, // doesn't have. If these two requirements are not met, we fall back to // CopyNonConserved. - // first let's get our hands on the component library - conop::BuilderP builder=conop::Conopology::Instance().GetBuilder("DEFAULT"); - conop::RuleBasedBuilderPtr rbb=dyn_cast<conop::RuleBasedBuilder>(builder); - conop::CompoundLibPtr comp_lib=rbb->GetCompoundLib(); - CompoundPtr src_compound=comp_lib->FindCompound(src_res.GetName(), Compound::PDB); if (!src_compound) { diff --git a/modules/conop/src/nonstandard.hh b/modules/conop/src/nonstandard.hh index 50a214715b518e6f5c2893e8fbf9c968b5296b37..374694303e1d5d0f05626e7cc43d2f9c53139c5a 100644 --- a/modules/conop/src/nonstandard.hh +++ b/modules/conop/src/nonstandard.hh @@ -23,14 +23,14 @@ Author: Marco Biasini, Juergen Haas */ #include "module_config.hh" - +#include "compound_lib.hh" namespace ost { namespace conop { -/// \brief copies all atom of src_res to dst_res +/// \brief copies all atom of src_res to dst_res, gets compound lib from builder /// \param has_cbeta will be set to true if the src_res has a cbeta and the /// dst_residue is not a glycine, it will be inserted if in the dst should /// be one and in src it was not present @@ -39,18 +39,39 @@ namespace ost { namespace conop { bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi); - -/// \brief copies all atom of src_res to dst_res + +/// \brief copies all atom of src_res to dst_res, requires a compound lib /// \param has_cbeta will be set to true if the src_res has a cbeta and the -/// dst_residue is not a glycine +/// dst_residue is not a glycine, it will be inserted if in the dst should +/// be one and in src it was not present +bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, + ost::mol::ResidueHandle dst_res, + ost::mol::XCSEditor& edi, CompoundLibPtr lib); +/// \brief copies all atom of src_res to dst_res +/// \param has_cbeta will be set to true if the src_res has a cbeta and the +/// dst_residue is not a glycine bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); -/// \brief copies atoms of src_res to dst_res +/// \brief copies atoms of src_res to dst_res, gets compound lib from builder +/// +/// src_res and dst_res are thought to be conserved, e.g. the parent standard +/// amino acid of both residues is the same. This includes cases where e.g. the +/// src_rs is and MSE and the dst_res is a MET. This function automatically +/// tries to do the right thing an keep as many atoms as possible from src_res + + + +bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, + ost::mol::ResidueHandle dst_res, + ost::mol::XCSEditor& edi, + bool& has_cbeta, CompoundLibPtr lib); + +/// \brief copies atoms of src_res to dst_res, requires compound lib /// /// src_res and dst_res are thought to be conserved, e.g. the parent standard /// amino acid of both residues is the same. This includes cases where e.g. the @@ -64,6 +85,7 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::XCSEditor& edi, bool& has_cbeta); + /// \brief construct dst_res in case src_res and dst_res are not conserved. /// /// This essentially copies the backbone of src_res to dst_res. The CB atom is @@ -88,7 +110,7 @@ bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, bool DLLEXPORT_OST_CONOP CopyModified(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta); + bool& has_cbeta, CompoundLibPtr lib); diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6289985b62ad1b6fac0d51c1d1e6976684430a7 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(molck) diff --git a/tools/molck/CMakeLists.txt b/tools/molck/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..99e68ec2c9919373cf2ed50ebdb89a9b70e4ca6b --- /dev/null +++ b/tools/molck/CMakeLists.txt @@ -0,0 +1,2 @@ +executable(NAME molck SOURCES main.cc + DEPENDS_ON ost_io) diff --git a/tools/molck/main.cc b/tools/molck/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..613383213c6df6be4fe097f15a7faefaab854991 --- /dev/null +++ b/tools/molck/main.cc @@ -0,0 +1,310 @@ +#include <unistd.h> +#include <boost/program_options.hpp> +#include <boost/filesystem.hpp> +#include <ost/platform.hh> +#include <ost/conop/model_check.hh> +#include <ost/conop/conop.hh> +#include <ost/conop/amino_acids.hh> +#include <ost/io/mol/pdb_reader.hh> +#include <ost/io/mol/pdb_writer.hh> +#include <ost/io/io_exception.hh> +#include <ost/conop/nonstandard.hh> +using namespace ost; +using namespace ost::conop; +using namespace ost::mol; +using namespace ost::io; + +namespace po=boost::program_options; +namespace fs=boost::filesystem; + +EntityHandle load_x(const String& file, const IOProfile& profile) +{ + try { + PDBReader reader(file, profile); + if (reader.HasNext()) { + EntityHandle ent=CreateEntity(); + reader.Import(ent); + return ent; + } + std::cerr << "ERROR: '" << file << "' does not contain any ATOM records. " + << "Are you sure this is a PDB file?" << std::endl; + return EntityHandle(); + } catch (std::exception& e) { + std::cerr << "ERROR: " << e.what() << std::endl; + return EntityHandle(); + } +} + +void usage() +{ + std::cerr << "usage: molck file1.pdb [file2.pdb [...]]" << std::endl; + exit(0); +} + +int main(int argc, const char *argv[]) +{ + if (argc<2) { + usage(); + } + IOProfile prof; + String rm; + String color; + bool colored = false; + + char result[ 1024 ]; + ssize_t count = readlink( "/proc/self/exe", result, 1024 ); + String exe_path = std::string( result, (count > 0) ? count : 0 ); + + + CompoundLibPtr lib=CompoundLib::Load("compounds.chemlib"); + + #if !(defined(__APPLE__)) + + if (count==0) { + std::cerr << "Could not determine the path of the molck executable. Will only look for compounds.chemlib in the current working directory" << std::endl; + } else { + fs::path path_and_exe(exe_path); + fs::path path_only=path_and_exe.parent_path(); + fs::path share_path = path_only.parent_path(); + share_path/="share"; + share_path/="openstructure"; + share_path/="compounds.chemlib"; + + #if BOOST_FILESYSTEM_VERSION==3 || BOOST_VERSION<103400 + String share_path_string=share_path.string(); + #else + String share_path_string=share_path.file_string(); + #endif + + lib=CompoundLib::Load(share_path_string); + } + + #endif + if (!lib) { + std::cerr << "Could not load compounds.chemlib" << std::endl; + exit(-1); + } + bool rm_unk_atoms=false; + bool rm_hyd_atoms=false; + bool rm_non_std=false; + bool rm_oxt_atoms=false; + bool rm_zero_occ_atoms=false; + bool write_to_stdout = false; + bool map_nonstd_res = false; + bool assign_elem = false; + po::options_description desc("Options"); + desc.add_options() + ("rm", po::value<String>(&rm)->default_value("hyd"), "atoms to be removed") + ("color", po::value<String>(&color)->default_value("auto"), + "whether the output should be colored.") + ("files", po::value< std::vector<String> >(), "input file(s)") + ("stdout", "write cleaned structure to stdout") + ("map-nonstd", "map non standard residues back to standard ones (e.g.: MSE->MET,SEP->SER,etc.)") + ("assign-elem", "insert element") + ; + po::positional_options_description p; + p.add("files", -1); + std::vector<String> files; + po::variables_map vm; + + try { + po::store(po::command_line_parser(argc, argv). + options(desc).positional(p).run(), + vm); + } catch (std::exception& e) { + std::cout << e.what() << std::endl; + usage(); + exit(-1); + } + po::notify(vm); + if (vm.count("files")) { + files = vm["files"].as<std::vector<String> >(); + } else { + usage(); + exit(-1); + } + if (vm.count("stdout")) { + write_to_stdout = true; + } + if (vm.count("map-nonstd")) { + map_nonstd_res = true; + } + if (vm.count("assign-elem")) { + assign_elem = true; + } + + std::vector<StringRef> rms=StringRef(rm.c_str(), rm.size()).split(','); + for (size_t i=0; i<rms.size(); ++i) { + if (rms[i] == StringRef("unk", 3)) { + rm_unk_atoms = true; + } else if (rms[i] == StringRef("nonstd", 6)) { + rm_non_std = true; + } else if (rms[i] == StringRef("hyd", 3)) { + rm_hyd_atoms = true; + } else if (rms[i] == StringRef("oxt", 3)) { + rm_oxt_atoms = true; + } else if (rms[i] == StringRef("zeroocc", 7)) { + rm_zero_occ_atoms = true; + } else { + std::cerr << "unknown value to remove '" << rms[i] << "'" << std::endl; + usage(); + exit(-1); + } + } + if (color=="auto") { + colored = isatty(STDERR_FILENO); + } else if (color == "on" || color == "1" || color == "yes") { + colored = true; + } else if (color == "off" || color == "0" || color == "no") { + colored = false; + } else { + usage(); + exit(-1); + } + for (unsigned int i = 0; i < files.size(); ++i) { + EntityHandle ent=load_x(files[i], prof); + if (!ent.IsValid()) { + continue; + } + if (map_nonstd_res) { + EntityHandle new_ent=CreateEntity(); + ChainHandleList chains=ent.GetChainList(); + XCSEditor new_edi=new_ent.EditXCS(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ChainHandle new_chain = new_edi.InsertChain(c->GetName()); + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + AminoAcid aa = ResidueToAminoAcid(*r); + if (aa!=XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } else { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || + OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); + std::cout << " I am here" << std::endl; + + CopyResidue(*r,dest_res,new_edi,lib); + } + } + } + ent=new_ent; + } + + XCSEditor edi=ent.EditXCS(); + DiagEngine diags; + Checker checker(lib, ent, diags); + if (rm_zero_occ_atoms) { + std::cerr << "removing zero occupancy atoms" << std::endl; + int zremoved=0; + AtomHandleList zero_atoms=checker.GetZeroOccupancy(); + for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + zremoved++; + } + std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; + } + + if (rm_hyd_atoms) { + std::cerr << "removing hydrogen atoms" << std::endl; + int hremoved=0; + AtomHandleList hyd_atoms=checker.GetHydrogens(); + for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + hremoved++; + } + std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; + } + if (rm_oxt_atoms) { + std::cerr << "removing OXT atoms" << std::endl; + int oremoved=0; + AtomHandleList atoms=ent.GetAtomList(); + for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if (i->GetName()=="OXT") { + edi.DeleteAtom(*i); + oremoved++; + } + } + std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + } + + if (rm_non_std) { + std::cerr << "removing OXT atoms" << std::endl; + int oremoved=0; + AtomHandleList atoms=ent.GetAtomList(); + for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if (i->GetName()=="OXT") { + edi.DeleteAtom(*i); + oremoved++; + } + } + std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + } + + checker.CheckForCompleteness(); + checker.CheckForUnknownAtoms(); + checker.CheckForNonStandard(); + for (size_t j=0; j<diags.GetDiags().size(); ++j) { + const Diag* diag=diags.GetDiags()[j]; + std::cerr << diag->Format(colored); + switch (diag->GetType()) { + case DIAG_UNK_ATOM: + if (rm_unk_atoms) { + edi.DeleteAtom(diag->GetAtom(0)); + std::cerr << " --> removed "; + } + break; + case DIAG_NONSTD_RESIDUE: + if (rm_non_std) { + edi.DeleteResidue(diag->GetResidue(0)); + std::cerr << " --> removed "; + } + break; + default: + break; + } + std::cerr << std::endl; + } + if (assign_elem) { + ChainHandleList chains=ent.GetChainList(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + AtomHandleList atoms=r->GetAtomList(); + if (!compound) { + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + j->SetElement(""); + } + continue; + } + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + int specindx=compound->GetAtomSpecIndex(j->GetName()); + if (specindx!=-1) { + j->SetElement(compound->GetAtomSpecs()[specindx].element); + } else { + j->SetElement(""); + } + } + } + } + } + if (write_to_stdout) { + PDBWriter writer(std::cout, prof); + writer.Write(ent); + } + } + return 0; +}