Skip to content
Snippets Groups Projects
Commit 3808f0e2 authored by Rafal Gumienny's avatar Rafal Gumienny
Browse files

fix: SCHWED-3121 Perform consistency checks by default - breaking change:...

fix: SCHWED-3121 Perform consistency checks by default - breaking change: consistency-checks to ignore-consistency-checks
parent 3619149f
Branches
Tags
No related merge requests found
...@@ -73,7 +73,7 @@ from ost.io import (LoadPDB, LoadMMCIF, SavePDB, MMCifInfoBioUnit, MMCifInfo, ...@@ -73,7 +73,7 @@ from ost.io import (LoadPDB, LoadMMCIF, SavePDB, MMCifInfoBioUnit, MMCifInfo,
MMCifInfoTransOp, ReadStereoChemicalPropsFile) MMCifInfoTransOp, ReadStereoChemicalPropsFile)
from ost import PushVerbosityLevel from ost import PushVerbosityLevel
from ost.mol.alg import (qsscoring, Molck, MolckSettings, lDDTSettings, from ost.mol.alg import (qsscoring, Molck, MolckSettings, lDDTSettings,
CheckStructure) CheckStructure, ResidueNamesMatch)
from ost.conop import CompoundLib from ost.conop import CompoundLib
...@@ -214,10 +214,10 @@ def _ParseArgs(): ...@@ -214,10 +214,10 @@ def _ParseArgs():
nargs="+", nargs="+",
type=lambda x: x.split(":"), type=lambda x: x.split(":"),
dest="chain_mapping", dest="chain_mapping",
help=("Mapping of chains between the model and the reference.\n" help=("Mapping of chains between the reference and the model.\n"
"Each separate mapping consist of key:value pairs where key\n" "Each separate mapping consist of key:value pairs where key\n"
"is the chain name in model and value is the chain name in\n" "is the chain name in reference and value is the chain name in\n"
"reference.")) "model."))
parser.add_argument( parser.add_argument(
"-rna", "-rna",
"--residue-number-alignment", "--residue-number-alignment",
...@@ -304,12 +304,19 @@ def _ParseArgs(): ...@@ -304,12 +304,19 @@ def _ParseArgs():
"separation is higher than the provided parameter are\n" "separation is higher than the provided parameter are\n"
"considered when computing the score")) "considered when computing the score"))
parser.add_argument( parser.add_argument(
"-cc", "-ic",
"--consistency-checks", "--ignore-consistency-checks",
dest="consistency_checks", dest="ignore_consistency_checks",
default=False, default=False,
action="store_true", action="store_true",
help=("Residue name consistency checks.")) help=("Ignore consistency checks. By default residue name\n"
"consistency between a model-reference pair would be checked\n"
"and an error will be rised if no valid pair is found (For\n"
"mmCIF there could be more than one biounit in one file).\n"
"The pair that does not conform to the check will be skipped.\n"
"If the option is selected consistency checks will also be\n"
"performed but only a warning will be shown and the pair will\b"
"evaluated."))
parser.add_argument( parser.add_argument(
"-spr", "-spr",
"--save-per-residue-scores", "--save-per-residue-scores",
...@@ -397,9 +404,15 @@ def _ParseArgs(): ...@@ -397,9 +404,15 @@ def _ParseArgs():
def _RevertChainNames(ent): def _RevertChainNames(ent):
"""Revert chain names to original names.""" """Revert chain names to original names.
By default the first chain with given name will not have any number
attached to it ie. if there are two chains mapping to chain A the resulting
chain names will be: A and A2.
"""
editor = ent.EditXCS() editor = ent.EditXCS()
suffix = "_tmp" # just a suffix for temporary chain name suffix = "_tmp" # just a suffix for temporary chain name
separator = "" # dot causes selection error
used_names = dict() used_names = dict()
reverted_chains = dict() reverted_chains = dict()
for chain in ent.chains: for chain in ent.chains:
...@@ -414,12 +427,13 @@ def _RevertChainNames(ent): ...@@ -414,12 +427,13 @@ def _RevertChainNames(ent):
continue continue
new_name = original_name new_name = original_name
if new_name not in used_names: if new_name not in used_names:
used_names[original_name] = 1 used_names[original_name] = 2
reverted_chains[chain.name] = new_name reverted_chains[chain.name] = new_name
editor.RenameChain(chain, chain.name + suffix) editor.RenameChain(chain, chain.name + suffix)
else: else:
new_name = "%s_%i" % (original_name, # dot causes selection error new_name = "%s%s%i" % (original_name,
used_names[original_name]) separator,
used_names[original_name])
reverted_chains[chain.name] = new_name reverted_chains[chain.name] = new_name
editor.RenameChain(chain, chain.name + suffix) editor.RenameChain(chain, chain.name + suffix)
used_names[original_name] += 1 used_names[original_name] += 1
...@@ -429,6 +443,15 @@ def _RevertChainNames(ent): ...@@ -429,6 +443,15 @@ def _RevertChainNames(ent):
ost.LogInfo("Reverted chains: %s" % ", ".join(rev_out)) ost.LogInfo("Reverted chains: %s" % ", ".join(rev_out))
def _CheckConsistency(reference, model, chain_mapping, log_error):
is_cons = True
for ref_cname, mdl_cname in chain_mapping.iteritems():
ref_chain = reference.Select("cname=%s" % ref_cname)
mdl_chain = model.Select("cname=%s" % mdl_cname)
is_cons = ResidueNamesMatch(ref_chain, mdl_chain, log_error)
return is_cons
def _ReadStructureFile(path): def _ReadStructureFile(path):
"""Safely read structure file into OST entity. """Safely read structure file into OST entity.
...@@ -566,6 +589,7 @@ def _Main(): ...@@ -566,6 +589,7 @@ def _Main():
result["options"]["cwd"] = os.path.abspath(os.getcwd()) result["options"]["cwd"] = os.path.abspath(os.getcwd())
# #
# Perform scoring # Perform scoring
skipped = list()
for model in models: for model in models:
model_name = model.GetName() model_name = model.GetName()
model_results = dict() model_results = dict()
...@@ -584,6 +608,39 @@ def _Main(): ...@@ -584,6 +608,39 @@ def _Main():
"Using custom chain mapping: %s" % str( "Using custom chain mapping: %s" % str(
opts.chain_mapping)) opts.chain_mapping))
qs_scorer.chain_mapping = opts.chain_mapping qs_scorer.chain_mapping = opts.chain_mapping
else:
qs_scorer.chain_mapping # just to initialize it
ost.LogInfo("-" * 80)
ost.LogInfo("Checking consistency between %s and %s" % (
model_name, reference_name))
is_cons = _CheckConsistency(
reference,
model,
qs_scorer.chain_mapping,
not opts.ignore_consistency_checks)
if not opts.ignore_consistency_checks:
if not is_cons:
msg = (("Residue names in model %s and in reference "
"%s are inconsistent. Skipping.") % (
model_name,
reference_name))
ost.LogError(msg)
skipped.append(True)
continue
else:
ost.LogInfo("Consistency check: OK")
skipped.append(False)
else:
skipped.append(False)
if not is_cons:
msg = (("Residue names in model %s and in reference "
"%s are inconsistent.\nThis might lead to "
"corrupted results.") % (
model_name,
reference_name))
ost.LogWarning(msg)
else:
ost.LogInfo("Consistency check: OK")
if opts.qs_score: if opts.qs_score:
ost.LogInfo("-" * 80) ost.LogInfo("-" * 80)
ost.LogInfo("Computing QS-score") ost.LogInfo("Computing QS-score")
...@@ -622,8 +679,8 @@ def _Main(): ...@@ -622,8 +679,8 @@ def _Main():
radius=opts.inclusion_radius, radius=opts.inclusion_radius,
sequence_separation=opts.sequence_separation, sequence_separation=opts.sequence_separation,
sel=opts.selection, sel=opts.selection,
structural_checks=False, structural_checks=False, # These are performed elsewhere
consistency_checks=opts.consistency_checks, consistency_checks=False, # These are performed elsewhere
label="lddt") label="lddt")
ost.LogInfo("lDDT settings: ") ost.LogInfo("lDDT settings: ")
ost.LogInfo(str(lddt_settings).rstrip()) ost.LogInfo(str(lddt_settings).rstrip())
...@@ -725,6 +782,7 @@ def _Main(): ...@@ -725,6 +782,7 @@ def _Main():
reference_results["lddt"] = lddt_results reference_results["lddt"] = lddt_results
model_results[reference_name] = reference_results model_results[reference_name] = reference_results
if opts.dump_structures: if opts.dump_structures:
ost.LogInfo("-" * 80)
ref_output_path = os.path.join( ref_output_path = os.path.join(
os.path.dirname(opts.reference), os.path.dirname(opts.reference),
reference_name + opts.dump_suffix) reference_name + opts.dump_suffix)
...@@ -747,7 +805,12 @@ def _Main(): ...@@ -747,7 +805,12 @@ def _Main():
ost.LogError("Cannot save model: %s" % str(ex)) ost.LogError("Cannot save model: %s" % str(ex))
result["result"][model_name] = model_results result["result"][model_name] = model_results
if all(skipped) and len(skipped) > 0:
raise RuntimeError("Consistency check failed for all model-reference "
"pairs.")
if opts.output is not None: if opts.output is not None:
ost.LogInfo("#" * 80)
ost.LogInfo("Saving output into %s" % opts.output)
with open(opts.output, "w") as outfile: with open(opts.output, "w") as outfile:
outfile.write(json.dumps(result, indent=4)) outfile.write(json.dumps(result, indent=4))
......
...@@ -566,6 +566,9 @@ void lDDTScorer::_Init(){ ...@@ -566,6 +566,9 @@ void lDDTScorer::_Init(){
throw std::runtime_error(errstr.str()); throw std::runtime_error(errstr.str());
} }
} }
if (settings.consistency_checks) {
_CheckConsistency();
}
} }
Real lDDTScorer::GetGlobalScore(){ Real lDDTScorer::GetGlobalScore(){
...@@ -641,13 +644,9 @@ bool lDDTScorer::IsValid(){ ...@@ -641,13 +644,9 @@ bool lDDTScorer::IsValid(){
void lDDTScorer::_CheckConsistency(){ void lDDTScorer::_CheckConsistency(){
for (std::vector<EntityView>::const_iterator ref_list_it = references_view.begin(); for (std::vector<EntityView>::const_iterator ref_list_it = references_view.begin();
ref_list_it != references_view.end(); ++ref_list_it) { ref_list_it != references_view.end(); ++ref_list_it) {
bool cons_check = ResidueNamesMatch(model_view, *ref_list_it, settings.consistency_checks); bool cons_check = ResidueNamesMatch(model_view, *ref_list_it, true);
if (cons_check == false) { if (cons_check == false) {
if (settings.consistency_checks == true) { throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent.");
throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent.");
} else {
LOG_WARNING("Residue names in model and in reference structure(s) are inconsistent.");
}
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment