Skip to content
Snippets Groups Projects
Commit b7420f79 authored by Rafal Gumienny's avatar Rafal Gumienny
Browse files

fix: SCHWED-3121 Remove qs-score action, update documentation

parent ccf15e19
Branches
Tags
No related merge requests found
add_custom_target(actions ALL)
ost_action_init()
ost_action(ost-qs-score actions)
ost_action(ost-compare-structures actions)
......@@ -718,8 +718,6 @@ def _Main():
reference_results["qs_score"] = {
"status": "SUCCESS",
"error": "",
"model_name": model_name,
"reference_name": reference_name,
"global_score": qs_scorer.global_score,
"best_score": qs_scorer.best_score}
except qsscoring.QSscoreError as ex:
......@@ -728,8 +726,6 @@ def _Main():
reference_results["qs_score"] = {
"status": "FAILURE",
"error": str(ex),
"model_name": model_name,
"reference_name": reference_name,
"global_score": 0.0,
"best_score": 0.0}
# Calculate lDDT
......
"""Calculate Quaternary Structure score (QS-score) between two complexes."""
import os
import sys
import json
import argparse
import ost
from ost.io import (LoadPDB, LoadMMCIF, MMCifInfoBioUnit, MMCifInfo,
MMCifInfoTransOp)
from ost import PushVerbosityLevel
from ost.mol.alg import qsscoring
def _ParseArgs():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description=__doc__,
prog="ost qs-score")
parser.add_argument(
'-v',
'--verbosity',
type=int,
default=3,
help="Set verbosity level.")
parser.add_argument(
"-m",
"--model",
dest="model",
required=True,
help=("Path to the model file."))
parser.add_argument(
"-r",
"--reference",
dest="reference",
required=True,
help=("Path to the reference file."))
parser.add_argument(
"-c",
"--chain-mapping",
nargs="+",
type=lambda x: x.split(":"),
dest="chain_mapping",
help=("Mapping of chains between the model and the reference. "
"Each separate mapping consist of key:value pairs where key "
"is the chain name in model and value is the chain name in "
"reference."))
parser.add_argument(
"-o",
"--output",
dest="output",
help=("Output file name. The output will be saved as a JSON file."))
opts = parser.parse_args()
if opts.chain_mapping is not None:
try:
opts.chain_mapping = dict(opts.chain_mapping)
except ValueError:
raise ValueError("Cannot parse chain mapping into dictionary. The "
"correct format is: key:value.")
return opts
def _ReadStructureFile(path):
"""Safely read structure file into OST entity.
The functin can read both PDB and mmCIF files.
:param path: Path to the file.
:type path: :class:`str`
:returns: Entity
:rtype: :class:`~ost.mol.EntityHandle`
"""
entities = list()
if not os.path.isfile(path):
raise IOError("%s is not a file" % path)
try:
entity = LoadPDB(path)
if not entity.IsValid():
raise IOError("Provided file does not contain valid entity.")
entity.SetName(os.path.basename(path))
entities.append(entity)
except Exception:
try:
tmp_entity, cif_info = LoadMMCIF(path, info=True)
if len(cif_info.biounits) == 0:
tbu = MMCifInfoBioUnit()
tbu.id = 'ASU of ' + entity.pdb_id
tbu.details = 'asymmetric unit'
for chain in tmp_entity.chains:
tbu.AddChain(str(chain))
tinfo = MMCifInfo()
tops = MMCifInfoTransOp()
tinfo.AddOperation(tops)
tbu.AddOperations(tinfo.GetOperations())
entity = tbu.PDBize(tmp_entity, min_polymer_size=0)
entity.SetName(os.path.basename(path) + ".au")
entities.append(entity)
elif len(cif_info.biounits) > 1:
for i, biounit in enumerate(cif_info.biounits):
entity = biounit.PDBize(tmp_entity, min_polymer_size=0)
if not entity.IsValid():
raise IOError(
"Provided file does not contain valid entity.")
entity.SetName(os.path.basename(path) + "." + str(i))
entities.append(entity)
else:
biounit = cif_info.biounits[0]
entity = biounit.PDBize(tmp_entity, min_polymer_size=0)
if not entity.IsValid():
raise IOError(
"Provided file does not contain valid entity.")
entity.SetName(os.path.basename(path))
entities.append(entity)
except Exception as exc:
raise exc
return entities
def _Main():
"""Do the magic."""
opts = _ParseArgs()
PushVerbosityLevel(opts.verbosity)
#
# Read the input files
ost.LogInfo("Reading model from %s" % opts.model)
models = _ReadStructureFile(opts.model)
ost.LogInfo("Reading reference from %s" % opts.reference)
references = _ReadStructureFile(opts.reference)
if len(models) > 1 or len(references) > 1:
ost.LogInfo(
"Multiple complexes detected. All combinations will be tried.")
result = {"result": []}
#
# Perform scoring
for model in models:
for reference in references:
ost.LogInfo("#\nComparing %s to %s" % (
model.GetName(),
reference.GetName()))
try:
qs_scorer = qsscoring.QSscorer(reference,
model)
if opts.chain_mapping is not None:
ost.LogInfo(
"Using custom chain mapping: %s" % str(
opts.chain_mapping))
qs_scorer.chain_mapping = opts.chain_mapping
result["result"].append({
"status": "SUCCESS",
"error": "",
"model_name": model.GetName(),
"reference_name": reference.GetName(),
"global_score": qs_scorer.global_score,
"oligo_lddt_score": qs_scorer.lddt_score,
"best_score": qs_scorer.best_score,
"chain_mapping": qs_scorer.chain_mapping
})
except qsscoring.QSscoreError as ex:
# default handling: report failure and set score to 0
ost.LogError('QSscore failed:', str(ex))
result["result"].append({
"status": "FAILURE",
"error": str(ex),
"model_name": model.GetName(),
"reference_name": reference.GetName(),
"global_score": 0.0,
"oligo_lddt_score": 0.0,
"best_score": 0.0,
"chain_mapping": None
})
if opts.output is not None:
with open(opts.output, "w") as outfile:
outfile.write(json.dumps(result, indent=4))
if __name__ == '__main__':
# make script 'hot'
unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0)
sys.stdout = unbuffered
_Main()
......@@ -9,43 +9,97 @@ you can type ``ost <ACTION> -h`` to get a description on its usage.
Here we list the most prominent actions with simple examples.
.. ost-qs-score:
.. ost-compare-structures:
Calculating QS-Score
Comparing two structures
--------------------------------------------------------------------------------
You can calculate quaternary structure score between two complexes
from the command line with
You can compare two structures in terms of quaternary structure score and
lDDT scores between two complexes from the command line with:
.. code-block:: console
$ ost qs-score [-h] [-v VERBOSITY] -m MODEL -r REFERENCE
[-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [-o OUTPUT]
$ ost compare-structures [-h] [-v VERBOSITY] -m MODEL -r REFERENCE
[-o OUTPUT] [-d] [-ds DUMP_SUFFIX]
[-rs REFERENCE_SELECTION] [-ms MODEL_SELECTION]
[-ca] [-ft] [-qs]
[-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [-rna]
[-l] [-sc] [-p PARAMETER_FILE]
[-bt BOND_TOLERANCE] [-at ANGLE_TOLERANCE]
[-ir INCLUSION_RADIUS] [-ss SEQUENCE_SEPARATION]
[-cc] [-spr] [-ml] [-cl COMPOUND_LIBRARY]
[-rm REMOVE [REMOVE ...]] [-ce] [-mn]
By default the verbosity is set to 3 which will result in the informations
being shown in the console. The result can be (optionally) saved as JSON file
which is the preferred way of parsing it as the log output might change in the
future. The output file has following format:
future. Optionally, the local scores for lDDT can also be dumped to the output
file. Additionally, cleaned up structures can be saved to the disk.
The output file has following format:
.. code-block:: python
{
"result": [
{
"status": <SUCCESS or FAILURE>,
"reference_name": <Reference name extracted from the file name>,
"model_name": <Model name extracted from the file name>,
"error": <ERROR message if any>,
"best_score": <Best QS-score>,
"lddt_score": <lDDT score>
"global_score": <Global QS-score>,
"chain_mapping": <Mapping of chains eg. {"A": "B", "B": "A"}>,
"result": {
"<MODEL NAME>": { # Model name extracted from the file name
"<REFERENCE NAME>": { # Reference name extracted from the file name
"info": {
"residue_names_consistent": <Are the residue numbers consistent? true or false>,
"mapping": <Mapping of chains eg. {"A": "B", "B": "A"}>,
"alignments": <list of chain-chain alignments in FASTA format>
}
},
"lddt": { # calculated when --lddt (-l) option is selected
"oligo_lddt": {
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"global_score": <calculated oligomeric lddt score>,
},
"weighted_lddt": {
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"global_score": <calculated weighted lddt score>,
},
"single_chain_lddt": [ # a list of chain-chain lDDts
{
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"reference_chain": <name of the chain in reference>,
"model_chain": <name of the chain in model>
"global_score": <calculated single-chain lddt score>,
"conserved_contacts": <number of conserved contacts between model and reference>,
"total_contacts": <total number of contacts between model and reference>,
"per_residue_scores": [ # per-residue lDDT scores - calculated when --save-per-residue-scores (-spr) option is selected
{
"total_contacts": <total number of contacts between model and reference>,
"residue_name": <three letter code of the residue>,
"lddt": <residue lDDT score>,
"conserved_contacts": <number of conserved contacts between model and reference for given residue>,
"residue_number": <total number of contacts between model and reference for given residue>
},
.
.
.
]
}
]
},
"qs_score": { # calculated when --qs-score (-q) option is selected
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"global_score": <Global QS-score>,
"best_score": <Best QS-score>,
}
}
}
]
},
"options": {} # Options used to run the script
}
The "result" filed is a list as eg. in mmCIF file there can be many entities and
the script will compare all combinations.
The "result" filed is a dictionary mapping from model to reference as eg. in
mmCIF file there can be many entities and the script will compare all
combinations.
Example usage:
......@@ -53,13 +107,247 @@ Example usage:
$ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/bu_target_01.pdb > reference.pdb
$ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb
$ ost qs-score -r reference.pdb -m model.pdb -o qs.json
$ $OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output.json --qs-score --residue-number-alignment --lddt --structural-checks --consistency-checks --inclusion-radius 15.0 --bond-tolerance 15.0 --angle-tolerance 15.0 --molck --remove oxt hyd unk --clean-element-column --map-nonstandard-residues
################################################################################
Reading input files (fault_tolerant=False)
--> reading model from model.pdb
imported 2 chains, 396 residues, 3106 atoms; with 0 helices and 0 strands
--> reading reference from reference.pdb
imported 3 chains, 408 residues, 3011 atoms; with 0 helices and 0 strands
################################################################################
Cleaning up input with Molck
removing hydrogen atoms
--> removed 0 hydrogen atoms
removing OXT atoms
--> removed 0 OXT atoms
residue A.GLN54 is missing 4 atoms: 'CG', 'CD', 'OE1', 'NE2'
residue A.GLU55 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2'
residue A.ARG139 is missing 6 atoms: 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2'
residue B.THR53 is missing 1 atom: 'CG2'
residue B.GLN54 is missing 4 atoms: 'CG', 'CD', 'OE1', 'NE2'
residue B.GLU55 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2'
residue B.GLU61 is missing 1 atom: 'OE2'
residue B.GLU117 is missing 1 atom: 'O'
residue B.ARG120 is missing 2 atoms: 'NH1', 'NH2'
residue B.ARG142 is missing 2 atoms: 'NH1', 'NH2'
residue B.GLU148 is missing 4 atoms: 'CG', 'CD', 'OE1', 'OE2'
residue B.PRO198 is missing 1 atom: 'O'
_.CL1 is not a standard amino acid
_.CL2 is not a standard amino acid
_.CL3 is not a standard amino acid
_.CL4 is not a standard amino acid
_.CA5 is not a standard amino acid
_.CA6 is not a standard amino acid
_.CA7 is not a standard amino acid
_.CA8 is not a standard amino acid
_.CA9 is not a standard amino acid
_.CL10 is not a standard amino acid
_.CL11 is not a standard amino acid
_.CL12 is not a standard amino acid
_.CL13 is not a standard amino acid
_.CL14 is not a standard amino acid
_.CL15 is not a standard amino acid
_.CA16 is not a standard amino acid
_.CA17 is not a standard amino acid
_.CA18 is not a standard amino acid
_.CA19 is not a standard amino acid
_.CA20 is not a standard amino acid
_.EDO21 is not a standard amino acid
_.EDO22 is not a standard amino acid
_.EDO23 is not a standard amino acid
_.EDO24 is not a standard amino acid
removing hydrogen atoms
--> removed 0 hydrogen atoms
removing OXT atoms
--> removed 0 OXT atoms
################################################################################
Performing structural checks
--> for reference(s)
Checking
Checking stereo-chemistry
Average Z-Score for bond lengths: -nan
Bonds outside of tolerance range: 0 out of 0
Bond Avg Length Avg zscore Num Bonds
Average Z-Score angle widths: 0.00000
Angles outside of tolerance range: 0 out of 1
Filtering non-bonded clashes
0 non-bonded short-range distances shorter than tolerance distance
Distances shorter than tolerance are on average shorter by: 0.00000
--> for model(s)
Checking
Checking stereo-chemistry
Average Z-Score for bond lengths: -nan
Bonds outside of tolerance range: 0 out of 0
Bond Avg Length Avg zscore Num Bonds
Average Z-Score angle widths: 0.00000
Angles outside of tolerance range: 0 out of 1
Filtering non-bonded clashes
0 non-bonded short-range distances shorter than tolerance distance
Distances shorter than tolerance are on average shorter by: 0.00000
################################################################################
Comparing to
Chains removed from : _
Chains in : AB
Chains in : AB
Chemically equivalent chain-groups in pdb_1: [['B', 'A']]
Chemically equivalent chain-groups in pdb_2: [['A', 'B']]
Chemical chain-groups mapping: {('B', 'A'): ('A', 'B')}
Identifying Symmetry Groups...
Symmetry threshold 0.1 used for angles of pdb_1
Symmetry threshold 0.1 used for axis of pdb_1
Symmetry threshold 0.1 used for angles of pdb_2
Symmetry threshold 0.1 used for axis of pdb_2
Selecting Symmetry Groups...
Symmetry-groups used in pdb_1: [('B',), ('A',)]
Symmetry-groups used in pdb_2: [('A',), ('B',)]
Closed Symmetry with strict parameters
Mapping found: {'A': 'B', 'B': 'A'}
--------------------------------------------------------------------------------
Checking consistency between and
Consistency check: OK
--------------------------------------------------------------------------------
Computing QS-score
QSscore pdb_1, pdb_2: best: 0.90, global: 0.90
--------------------------------------------------------------------------------
Computing lDDT scores
lDDT settings:
Inclusion Radius: 15
Sequence separation: 0
Cutoffs: 0.5, 1, 2, 4
Residue properties label: lddt
===
--> Computing lDDT between model chain B and reference chain A
Coverage: 1 (187 out of 187 residues)
Global LDDT score: 0.8257
(877834 conserved distances out of 1063080 checked, over 4 thresholds)
--> Computing lDDT between model chain A and reference chain B
Coverage: 1 (197 out of 197 residues)
Global LDDT score: 0.7854
(904568 conserved distances out of 1151664 checked, over 4 thresholds)
--> Computing oligomeric lDDT score
Reference pdb_1 has: 2 chains
Model pdb_2 has: 2 chains
Coverage: 1 (384 out of 384 residues)
Oligo lDDT score: 0.8025
--> Computing weighted lDDT score
Weighted lDDT score: 0.8048
################################################################################
Saving output into output.json
This reads the model and reference file and calculates QS-score between them.
In the example above the output file looks as follows:
.. code-block:: python
Reading model from model.pdb
{
"result": {
"": {
"": {
"info": {
"residue_names_consistent": true,
"mapping": {
"chain_mapping": {
"A": "B",
"B": "A"
},
"alignments": [
">reference:A\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSL---QELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVR-------LGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:B\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP",
">reference:B\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:A\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP"
]
}
},
"lddt": {
"oligo_lddt": {
"status": "SUCCESS",
"global_score": 0.8025223016738892,
"error": ""
},
"weighted_lddt": {
"status": "SUCCESS",
"global_score": 0.804789180710712,
"error": ""
},
"single_chain_lddt": [
{
"status": "SUCCESS",
"global_score": 0.8257459402084351,
"conserved_contacts": 877834,
"reference_chain": "A",
"total_contacts": 1063080,
"error": "",
"model_chain": "B"
},
{
"status": "SUCCESS",
"global_score": 0.7854443788528442,
"conserved_contacts": 904568,
"reference_chain": "B",
"total_contacts": 1151664,
"error": "",
"model_chain": "A"
}
]
},
"qs_score": {
"status": "SUCCESS",
"global_score": 0.8974384796108209,
"best_score": 0.9022811630070536,
"error": ""
}
}
}
},
"options": {
"reference": "reference.pdb",
"structural_checks": true,
"chain_mapping": null,
"bond_tolerance": 15.0,
"parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt",
"consistency_checks": true,
"qs_score": true,
"map_nonstandard_residues": true,
"save_per_residue_scores": false,
"fault_tolerant": false,
"reference_selection": "",
"cwd": "CWD",
"inclusion_radius": 15.0,
"angle_tolerance": 15.0,
"c_alpha_only": false,
"clean_element_column": true,
"dump_suffix": ".compare.structures.pdb",
"compound_library": "Path to stage/share/openstructure/compounds.chemlib",
"dump_structures": false,
"residue_number_alignment": true,
"verbosity": 3,
"remove": [
"oxt",
"hyd",
"unk"
],
"molck": true,
"sequence_separation": 0,
"output": "output.json",
"model": "model.pdb",
"lddt": true,
"model_selection": ""
}
}
If only all the structures are clean one can omit all the checking steps and
calculate eg. QS-score directly:
.. code:: console
$OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output_qs.json --qs-score --residue-number-alignment
################################################################################
Reading input files (fault_tolerant=False)
--> reading model from model.pdb
imported 2 chains, 396 residues, 3106 atoms; with 0 helices and 0 strands
Reading reference from reference.pdb
--> reading reference from reference.pdb
imported 3 chains, 408 residues, 3011 atoms; with 0 helices and 0 strands
#
################################################################################
Comparing model.pdb to reference.pdb
Chains removed from reference.pdb: _
Chains in reference.pdb: AB
......@@ -77,34 +365,12 @@ Example usage:
Symmetry-groups used in model.pdb: [('A',), ('B',)]
Closed Symmetry with strict parameters
Mapping found: {'A': 'B', 'B': 'A'}
QSscore reference.pdb, model.pdb: best: 0.90, global: 0.86
Computing lDDT score
Reference reference.pdb has: 2 chains
Model model.pdb has: 2 chains
Reference reference.pdb has: 384 residues
Model model.pdb has: 396 residues
lDDT score: 0.924
This reads the model and reference file and calculates QS-score between them.
In the example above the output file looks as follows:
.. code-block:: python
{
"result": [
{
"status": "SUCCESS",
"chain_mapping": {
"A": "B",
"B": "A"
},
"global_score": 0.8649009937090414,
"error": "",
"reference_name": "reference.pdb",
"best_score": 0.8979578363524581,
"model_name": "model.pdb",
"lddt_score": 0.924271821975708
}
]
}
--------------------------------------------------------------------------------
Checking consistency between model.pdb and reference.pdb
Consistency check: OK
--------------------------------------------------------------------------------
Computing QS-score
QSscore reference.pdb, model.pdb: best: 0.90, global: 0.90
################################################################################
Saving output into output_qs.json
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment