Something went wrong on our end
lddt.cc 18.06 KiB
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <iomanip>
#if defined (_MSC_VER)
#define BOOST_ALL_DYN_LINK 1
#endif
#include <boost/program_options.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/convenience.hpp>
#include <ost/base.hh>
#include <ost/boost_filesystem_helper.hh>
#include <ost/mol/alg/local_dist_diff_test.hh>
#include <ost/mol/alg/filter_clashes.hh>
#include <ost/io/mol/pdb_reader.hh>
#include <ost/io/io_exception.hh>
#include <ost/conop/conop.hh>
#include <ost/string_ref.hh>
#include <ost/conop/amino_acids.hh>
#include <ost/mol/iterator.hh>
#include <ost/platform.hh>
#include <ost/log.hh>
#include <ost/mol/alg/consistency_checks.hh>
#include <ost/conop/rule_based_builder.hh>
#include <ost/dyn_cast.hh>
using namespace ost;
using namespace ost::io;
using namespace ost::mol;
using namespace ost::mol::alg;
namespace po=boost::program_options;
// loads a file
EntityHandle load(const String& file, const IOProfile& profile)
{
try {
PDBReader reader(file, profile);
if (reader.HasNext()) {
EntityHandle ent=CreateEntity();
reader.Import(ent);
conop::Conopology& conop_inst=conop::Conopology::Instance();
conop_inst.GetBuilder()->SetBondFeasibilityCheck(profile.bond_feasibility_check);
conop_inst.ConnectAll(conop_inst.GetBuilder(), ent);
if (ent.GetChainList().size()!=1) {
std::cout << "WARNING: File " << file << " has more than one chain" << std::endl;
}
return ent;
}
std::cerr << "ERROR: '" << file << "' does not contain any ATOM records. "
<< "Are you sure this is a PDB file?" << std::endl;
return EntityHandle();
} catch (io::IOException& e) {
std::cerr << "ERROR: " << e.what() << std::endl;
return EntityHandle();
}
}
// prints usage output
void usage()
{
std::cerr << "usage: lddt [options] <mod1> [mod1 [mod2]] <re1>[,ref2,ref3]" << std::endl;
std::cerr << " -s selection performed on ref" << std::endl;
std::cerr << " -c use Calphas only" << std::endl;
std::cerr << " -f perform structural checks and filter input data" << std::endl;
std::cerr << " -t fault tolerant parsing" << std::endl;
std::cerr << " -p <file> use specified parmeter file. Mandatory" << std::endl;
std::cerr << " -v <level> verbosity level (0=results only,1=problems reported, 2=full report)" << std::endl;
std::cerr << " -b <value> tolerance in stddevs for bonds" << std::endl;
std::cerr << " -a <value> tolerance in stddevs for angles" << std::endl;
std::cerr << " -r <value> distance inclusion radius" << std::endl;
std::cerr << " -i <value> sequence separation" << std::endl;
std::cerr << " -e print version" << std::endl;
std::cerr << " -x ignore residue name consistency checks" << std::endl;
}
// computes coverage
std::pair<int,int> compute_coverage (const EntityView& v,const GlobalRDMap& glob_dist_list)
{
int second=0;
int first=0;
if (v.GetResidueList().size()==0) {
if (glob_dist_list.size()==0) {
return std::make_pair<int,int>(0,-1);
} else {
return std::make_pair<int,int>(0,glob_dist_list.size());
}
}
ChainView vchain=v.GetChainList()[0];
for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i)
{
ResNum rnum = (*i).first;
second++;
if (vchain.FindResidue(rnum)) {
first++;
}
}
return std::make_pair<int,int>(first,second);
}
bool is_resnum_in_globalrdmap(const ResNum& resnum, const GlobalRDMap& glob_dist_list)
{
for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) {
ResNum rn = i->first;
if (rn==resnum) {
return true;
}
}
return false;
}
int main (int argc, char **argv)
{
// sets some default values for parameters
String version = "1.2";
Real bond_tolerance = 12.0;
Real angle_tolerance = 12.0;
Real radius=15.0;
int sequence_separation = 0;
// creates the required loading profile
IOProfile profile;
profile.bond_feasibility_check=false;
// parses options
String sel;
bool structural_checks=false;
bool consistency_checks=true;
po::options_description desc("Options");
desc.add_options()
("calpha,c", "consider only calpha atoms")
("sel,s", po::value<String>(&sel)->default_value(""), "selection performed on reference structure")
("tolerant,t", "fault tolerant mode")
("structural-checks,f", "perform stereo-chemical and clash checks")
("ignore-consistency-checks,x", "ignore residue name consistency checks")
("version,e", "version")
("parameter-file,p", po::value<String>(), "stereo-chemical parameter file")
("verbosity,v", po::value<int>(), "verbosity level")
("bond_tolerance,b", po::value<Real>(), "tolerance in stddev for bonds")
("angle_tolerance,a", po::value<Real>(), "tolerance in stddev for angles")
("inclusion_radius,r", po::value<Real>(), "distance inclusion radius")
("sequence_separation,i", po::value<int>(), "sequence separation")
("files", po::value< std::vector<String> >(), "input file(s)")
("reference",po::value<String>(),"reference(s)")
;
po::positional_options_description p;
p.add("files", -1);
po::variables_map vm;
try {
po::store(po::command_line_parser(argc, argv).
options(desc).positional(p).run(),
vm);
} catch (std::exception& e) {
std::cout << e.what() << std::endl;
usage();
exit(-1);
}
po::notify(vm);
if (vm.count("version")) {
std::cout << "Version: " << version << std::endl;
exit(0);
}
std::vector<String> files;
if (vm.count("files")) {
files=vm["files"].as<std::vector<String> >();
} else {
usage();
exit(-1);
}
if (vm.count("calpha")) {
profile.calpha_only=true;
}
if (vm.count("structural-checks")) {
structural_checks=true;
}
if (vm.count("ignore-consistency-checks")) {
consistency_checks=false;
}
if (vm.count("tolerant")) {
profile.fault_tolerant=true;
}
String parameter_filename;
if (vm.count("parameter-file")) {
parameter_filename=vm["parameter-file"].as<String>();
} else if (structural_checks==true) {
std::cout << "Please specify a stereo-chemical parameter file" << std::endl;
exit(-1);
}
int verbosity_level=0;
int ost_verbosity_level=2;
if (vm.count("verbosity")) {
verbosity_level=vm["verbosity"].as<int>();
if (verbosity_level==0) {
ost_verbosity_level=2;
} else if (verbosity_level==1) {
ost_verbosity_level=3;
} else if (verbosity_level==2) {
ost_verbosity_level=4;
} else {
std::cout << "Verbosity level " << verbosity_level << " is not available" << std::endl;
exit(-1);
}
}
Logger::Instance().PushVerbosityLevel(ost_verbosity_level);
if (vm.count("bond_tolerance")) {
bond_tolerance=vm["bond_tolerance"].as<Real>();
}
if (vm.count("angle_tolerance")) {
angle_tolerance=vm["angle_tolerance"].as<Real>();
}
if (vm.count("inclusion_radius")) {
radius=vm["inclusion_radius"].as<Real>();
}
if (vm.count("sequence_separation")) {
sequence_separation=vm["sequence_separation"].as<int>();
}
std::vector<Real> cutoffs;
cutoffs.push_back(0.5);
cutoffs.push_back(1.0);
cutoffs.push_back(2.0);
cutoffs.push_back(4.0);
std::vector<EntityView> ref_list;
// loads the reference file and creates the list of distances to check in lddt
// if the reference file is a comma-separated list of files, switches to multi-
// reference mode
GlobalRDMap glob_dist_list;
String ref_file=files.back();
ost::StringRef ref_file_sr(ref_file.c_str(),ref_file.length());
std::vector<StringRef> ref_file_split_sr=ref_file_sr.split(',');
if (ref_file_split_sr.size()==1) {
std::cout << "Multi-reference mode: Off" << std::endl;
String ref_filename = ref_file_split_sr[0].str();
EntityHandle ref=load(ref_filename, profile);
if (!ref) {
exit(-1);
}
EntityView refview=ref.GetChainList()[0].Select("peptide=true");
ref_list.push_back(refview);
glob_dist_list = CreateDistanceList(refview,radius);
} else {
std::cout << "Multi-reference mode: On" << std::endl;
for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin();
ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) {
String ref_filename = ref_file_split_sr_it->str();
EntityHandle ref=load(ref_filename, profile);
if (!ref) {
exit(-1);
}
if (! ref_list.empty()) {
if (ref_list[0].GetChainList()[0].GetName()!=ref.GetChainList()[0].GetName()) {
std::cout << "ERROR: First chains in the reference structures have different names" << std::endl;
exit(-1);
}
}
EntityView refview=ref.GetChainList()[0].Select("peptide=true");
ref_list.push_back(ref.CreateFullView());
}
glob_dist_list = CreateDistanceListFromMultipleReferences (ref_list,cutoffs,sequence_separation,radius);
}
files.pop_back();
// prints out parameters used in the lddt calculation
std::cout << "Verbosity level: " << verbosity_level << std::endl;
if (structural_checks) {
std::cout << "Stereo-chemical and steric clash checks: On " << std::endl;
} else {
std::cout << "Stereo-chemical and steric clash checks: Off " << std::endl;
}
std::cout << "Inclusion Radius: " << radius << std::endl;
std::cout << "Sequence separation: " << sequence_separation << std::endl;
if (structural_checks) {
std::cout << "Parameter filename: " << parameter_filename << std::endl;
std::cout << "Tolerance in stddevs for bonds: " << bond_tolerance << std::endl;
std::cout << "Tolerance in stddevs for angles: " << angle_tolerance << std::endl;
LOG_INFO("Log entries format:");
LOG_INFO("BOND INFO FORMAT: Chain Residue ResNum Bond Min Max Observed Z-score Status");
LOG_INFO("ANGLE INFO FORMAT: Chain Residue ResNum Angle Min Max Observed Z-score Status");
LOG_INFO("CLASH INFO FORMAT: Chain1 Residue1 ResNum1 Atom1 Chain2 Residue2 ResNum2 Atom2 Observed Difference Status");
}
LOG_INFO("LDDT INFO FORMAT: Chain1 Residue1 ResNum1 Atom1 Chain2 Residue2 ResNum2 Atom2 ModelDist TargetDist Difference Tolerance Status");
// cycles through the models to evaluate
for (size_t i=0; i<files.size(); ++i) {
EntityHandle model=load(files[i], profile);
if (!model) {
if (!profile.fault_tolerant) {
exit(-1);
}
continue;
}
EntityView v=model.GetChainList()[0].Select("peptide=true");
EntityView outv=model.GetChainList()[0].Select("peptide=true");
for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin();
ref_list_it != ref_list.end(); ++ref_list_it) {
bool cons_check = ResidueNamesMatch(v,*ref_list_it,consistency_checks);
if (cons_check==false) {
if (consistency_checks==true) {
LOG_ERROR("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent.");
exit(-1);
} else {
LOG_WARNING("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent.");
}
}
}
boost::filesystem::path pathstring(files[i]);
String filestring=BFPathToString(pathstring);
std::cout << "File: " << files[i] << std::endl;
std::pair<int,int> cov = compute_coverage(v,glob_dist_list);
if (cov.second == -1) {
std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl;
} else {
std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl;
}
if (structural_checks) {
// reads in parameter files
boost::filesystem::path loc(parameter_filename);
boost::filesystem::ifstream infile(loc);
if (!infile) {
std::cout << "Could not find " << parameter_filename << std::endl;
exit(-1);
}
std::vector<String> stereo_chemical_props;
String line;
while (std::getline(infile, line))
{
std::stringstream line_stream(line);
stereo_chemical_props.push_back(line);
}
StereoChemicalParams bond_table = FillStereoChemicalParams("Bond",stereo_chemical_props);
if (bond_table.IsEmpty()) {
std::cout << "Error reading the Bond section of the stereo-chemical parameter file." << std::endl;
exit(-1);
}
StereoChemicalParams angle_table = FillStereoChemicalParams("Angle",stereo_chemical_props);
if (angle_table.IsEmpty()) {
std::cout << "Error reading the Angles section of the stereo-chemical parameter file." << std::endl;
exit(-1);
}
ClashingDistances nonbonded_table = FillClashingDistances(stereo_chemical_props);
if (nonbonded_table.IsEmpty()) {
std::cout << "Error reading the Clashing section of the stereo-chemical parameter file." << std::endl;
exit(-1);
}
// performs structural checks and filters the structure
try {
v=alg::CheckStereoChemistry(v,bond_table,angle_table,bond_tolerance,angle_tolerance);
} catch (std::exception& e) {
std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl;
std::cout << e.what() << std::endl;
exit(-1);
}
try {
v=alg::FilterClashes(v,nonbonded_table);
} catch (std::exception& e) {
std::cout << "An error occurred during the structure quality checks, stage 2:" << std::endl;
std::cout << e.what() << std::endl;
exit(-1);
}
}
if (cov.first==0) {
std::cout << "Global LDDT score: 0.0" << std::endl;
return 0;
}
// computes the lddt score
String label="localldt";
std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, cutoffs, sequence_separation, label);
Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1);
std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl;
std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second
<< " checked, over " << cutoffs.size() << " thresholds)" << std::endl;
// prints the residue-by-residue statistics
if (structural_checks) {
std::cout << "Local LDDT Scores:" << std::endl;
std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl;
std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl;
std::cout << "in the backbone)" << std::endl;
} else {
std::cout << "Local LDDT Scores:" << std::endl;
}
if (structural_checks) {
std::cout << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl;
} else {
std::cout << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl;
}
for (ResidueViewIter rit=outv.ResiduesBegin();rit!=outv.ResiduesEnd();++rit){
ResidueView ritv=*rit;
ResNum rnum = ritv.GetNumber();
bool assessed = false;
String assessed_string="No";
bool quality_problems = false;
String quality_problems_string="No";
Real lddt_local = -1;
String lddt_local_string="-";
int conserved_dist = -1;
int total_dist = -1;
String dist_string = "-";
if (is_resnum_in_globalrdmap(rnum,glob_dist_list)) {
assessed = true;
assessed_string="Yes";
}
if (ritv.HasProp("stereo_chemical_violation_sidechain") || ritv.HasProp("steric_clash_sidechain")) {
quality_problems = true;
quality_problems_string="Yes";
}
if (ritv.HasProp("stereo_chemical_violation_backbone") || ritv.HasProp("steric_clash_backbone")) {
quality_problems = true;
quality_problems_string="Yes+";
}
if (assessed==true) {
if (ritv.HasProp(label)) {
lddt_local=ritv.GetFloatProp(label);
std::stringstream stkeylddt;
stkeylddt << std::fixed << std::setprecision(4) << lddt_local;
lddt_local_string=stkeylddt.str();
conserved_dist=ritv.GetIntProp(label+"_conserved");
total_dist=ritv.GetIntProp(label+"_total");
std::stringstream stkeydist;
stkeydist << "("<< conserved_dist << "/" << total_dist << ")";
dist_string=stkeydist.str();
} else {
lddt_local = 0;
lddt_local_string="0.0000";
conserved_dist = 0;
total_dist = 0;
dist_string="(0/0)";
}
}
if (structural_checks) {
std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << quality_problems_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl;
} else {
std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl;
}
}
std::cout << std::endl;
}
return 0;
}