Skip to content
Snippets Groups Projects
Commit cbf55fea authored by Gerardo Tauriello's avatar Gerardo Tauriello
Browse files

SCHWED-3890: clarify doc and help of ost compare-structures action.

parent 166b0b79
Branches
Tags
No related merge requests found
......@@ -33,38 +33,29 @@ Only model structures are "Molck-ed" in CAMEO. The call to molck is as follows:
--complib=<COMPOUND LIB> \\
--rm=hyd,oxt,unk \\
--fix-ele \\
--map-nonstd <FILEPATH> \\
--out=<OUTPUT>
--map-nonstd \\
--out=<OUTPUT> \\
<FILEPATH>
To be as much compatible with with CAMEO as possible one should call
compare-structures as follows:
ost compare-structures \\
# General parameters
####################
--model <MODEL> \\
--reference <REF> \\
--output output.json \\
# QS-score parameters
#####################
--qs-score \\
--residue-number-alignment \\
# lDDT parameters
#################
--lddt \\
--inclusion-radius 15.0 \\
# Molecular check parameters
############################
--molck \\
--remove oxt hyd unk \\
--clean-element-column \\
--map-nonstandard-residues \\
# Additional checks
###################
--structural-checks \\
--bond-tolerance 15.0 \\
--angle-tolerance 15.0 \\
--consistency-checks
--residue-number-alignment \\
--consistency-checks \\
--qs-score \\
--lddt \\
--inclusion-radius 15.0
"""
import os
......@@ -131,38 +122,49 @@ def _GetDefaultCompoundLibraryPath():
def _ParseArgs():
"""Parse command-line arguments."""
#
# General options
#
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description=__doc__,
prog="ost compare-structures")
parser.add_argument(
'-v',
'--verbosity',
type=int,
default=3,
help="Set verbosity level.")
parser.add_argument(
#
# Required arguments
#
group_required = parser.add_argument_group('required arguments')
group_required.add_argument(
"-m",
"--model",
dest="model",
required=True,
help=("Path to the model file."))
parser.add_argument(
group_required.add_argument(
"-r",
"--reference",
dest="reference",
required=True,
help=("Path to the reference file."))
parser.add_argument(
#
# General arguments
#
group_general = parser.add_argument_group('general arguments')
group_general.add_argument(
'-v',
'--verbosity',
type=int,
default=3,
help="Set verbosity level. Defaults to 3.")
group_general.add_argument(
"-o",
"--output",
dest="output",
help=("Output file name. The output will be saved as a JSON file."))
parser.add_argument(
group_general.add_argument(
"-d",
"--dump-structures",
dest="dump_structures",
......@@ -171,26 +173,26 @@ def _ParseArgs():
help=("Dump cleaned structures used to calculate all the scores as\n"
"PDB files using specified suffix. Files will be dumped to the\n"
"same location as original files."))
parser.add_argument(
group_general.add_argument(
"-ds",
"--dump-suffix",
dest="dump_suffix",
default=".compare.structures.pdb",
help=("Use this suffix to dump structures.\n"
"Defaults to .compare.structures.pdb."))
parser.add_argument(
group_general.add_argument(
"-rs",
"--reference-selection",
dest="reference_selection",
default="",
help=("Selection performed on reference structures."))
parser.add_argument(
group_general.add_argument(
"-ms",
"--model-selection",
dest="model_selection",
default="",
help=("Selection performed on model structures."))
parser.add_argument(
group_general.add_argument(
"-ca",
"--c-alpha-only",
dest="c_alpha_only",
......@@ -199,14 +201,14 @@ def _ParseArgs():
help=("Use C-alpha atoms only. Equivalent of calling the action with\n"
"'--model-selection=\"aname=CA\" "
"--reference-selection=\"aname=CA\"'\noptions."))
parser.add_argument(
group_general.add_argument(
"-ft",
"--fault-tolerant",
dest="fault_tolerant",
default=False,
action="store_true",
help=("Fault tolerant parsing."))
parser.add_argument(
group_general.add_argument(
"-cl",
"--compound-library",
dest="compound_library",
......@@ -215,99 +217,21 @@ def _ParseArgs():
"If not provided, the following locations are searched in this\n"
"order: 1. Working directory, 2. OpenStructure standard library"
"\nlocation."))
#
# QS-scorer options
#
parser.add_argument(
"-qs",
"--qs-score",
dest="qs_score",
default=False,
action="store_true",
help=("Calculate QS-score."))
parser.add_argument(
"-c",
"--chain-mapping",
nargs="+",
type=lambda x: x.split(":"),
dest="chain_mapping",
help=("Mapping of chains between the reference and the model.\n"
"Each separate mapping consist of key:value pairs where key\n"
"is the chain name in reference and value is the chain name in\n"
"model."))
parser.add_argument(
"--qs-rmsd",
dest="qs_rmsd",
default=False,
action="store_true",
help=("Calculate CA RMSD between shared CA atoms of mapped chains.\n"
"This uses a superposition using all mapped chains which\n"
"minimizes the CA RMSD."))
parser.add_argument(
"-rna",
"--residue-number-alignment",
dest="residue_number_alignment",
default=False,
action="store_true",
help=("Make alignment based on residue number instead of using\n"
"a global BLOSUM62-based alignment."))
parser.add_argument(
"--qs-max-mappings-extensive",
dest="qs_max_mappings_extensive",
type=int,
default=1000000,
help=("Maximal number of chain mappings to test for 'extensive'\n"
"chain mapping scheme which is used as a last resort if\n"
"other schemes failed. The extensive chain mapping search\n"
"must in the worst case check O(N!) possible mappings for\n"
"complexes with N chains. Two octamers without symmetry\n"
"would require 322560 mappings to be checked. To limit\n"
"computations, no scores are computed if we try more than\n"
"the maximal number of chain mappings."))
#
# lDDT options
#
parser.add_argument(
"-l",
"--lddt",
dest="lddt",
default=False,
action="store_true",
help=("Calculate lDDT."))
parser.add_argument(
"-ir",
"--inclusion-radius",
dest="inclusion_radius",
type=float,
default=15.0,
help=("Distance inclusion radius."))
parser.add_argument(
"-ss",
"--sequence-separation",
dest="sequence_separation",
type=int,
default=0,
help=("Sequence separation. Only distances between residues whose\n"
"separation is higher than the provided parameter are\n"
"considered when computing the score"))
parser.add_argument(
"-spr",
"--save-per-residue-scores",
dest="save_per_residue_scores",
default=False,
action="store_true",
help=(""))
#
# Molecular check parameters
# Molecular check arguments
#
parser.add_argument(
group_molck = parser.add_argument_group('molecular check arguments')
group_molck.add_argument(
"-ml",
"--molck",
dest="molck",
default=False,
action="store_true",
help=("Run molecular checker to clean up input."))
parser.add_argument(
group_molck.add_argument(
"-rm",
"--remove",
dest="remove",
......@@ -321,15 +245,16 @@ def _ParseArgs():
" * nonstd - remove all residues not one of the 20\n"
" * standard amino acids\n"
" * unk - Remove unknown and atoms not following the\n"
" nomenclature"))
parser.add_argument(
" nomenclature\n"
"Defaults to hyd."))
group_molck.add_argument(
"-ce",
"--clean-element-column",
dest="clean_element_column",
default=False,
action="store_true",
help=("Clean up element column"))
parser.add_argument(
group_molck.add_argument(
"-mn",
"--map-nonstandard-residues",
dest="map_nonstandard_residues",
......@@ -337,17 +262,21 @@ def _ParseArgs():
action="store_true",
help=("Map modified residues back to the parent amino acid, for\n"
"example MSE -> MET, SEP -> SER."))
#
# Options for various checks
# Structural check arguments
#
parser.add_argument(
group_sc = parser.add_argument_group('structural check arguments')
group_sc.add_argument(
"-sc",
"--structural-checks",
dest="structural_checks",
default=False,
action="store_true",
help=("Perform structural checks and filter input data."))
parser.add_argument(
group_sc.add_argument(
"-p",
"--parameter-file",
dest="parameter_file",
......@@ -357,21 +286,58 @@ def _ParseArgs():
"If not provided, the following locations are searched in this\n"
"order: 1. Working directory, 2. OpenStructure standard library"
"\nlocation."))
parser.add_argument(
group_sc.add_argument(
"-bt",
"--bond-tolerance",
dest="bond_tolerance",
type=float,
default=12.0,
help=("Tolerance in STD for bonds."))
parser.add_argument(
help=("Tolerance in STD for bonds. Defaults to 12."))
group_sc.add_argument(
"-at",
"--angle-tolerance",
dest="angle_tolerance",
type=float,
default=12.0,
help=("Tolerance in STD for angles."))
parser.add_argument(
help=("Tolerance in STD for angles. Defaults to 12."))
#
# Chain mapping arguments
#
group_cm = parser.add_argument_group('chain mapping arguments')
group_cm.add_argument(
"-c",
"--chain-mapping",
nargs="+",
type=lambda x: x.split(":"),
dest="chain_mapping",
help=("Mapping of chains between the reference and the model.\n"
"Each separate mapping consist of key:value pairs where key\n"
"is the chain name in reference and value is the chain name in\n"
"model."))
group_cm.add_argument(
"--qs-max-mappings-extensive",
dest="qs_max_mappings_extensive",
type=int,
default=1000000,
help=("Maximal number of chain mappings to test for 'extensive'\n"
"chain mapping scheme which is used as a last resort if\n"
"other schemes failed. The extensive chain mapping search\n"
"must in the worst case check O(N!) possible mappings for\n"
"complexes with N chains. Two octamers without symmetry\n"
"would require 322560 mappings to be checked. To limit\n"
"computations, no scores are computed if we try more than\n"
"the maximal number of chain mappings. Defaults to 1000000."))
#
# Sequence alignment arguments
#
group_aln = parser.add_argument_group('sequence alignment arguments')
group_aln.add_argument(
"-cc",
"--consistency-checks",
dest="consistency_checks",
......@@ -383,6 +349,73 @@ def _ParseArgs():
"will continue to calculate scores. If this flag is ON, checks\n"
"will not be ignored and if the pair does not pass the test\n"
"all the scores for that pair will be marked as a FAILURE."))
group_aln.add_argument(
"-rna",
"--residue-number-alignment",
dest="residue_number_alignment",
default=False,
action="store_true",
help=("Make alignment based on residue number instead of using\n"
"a global BLOSUM62-based alignment."))
#
# QS score arguments
#
group_qs = parser.add_argument_group('QS score arguments')
group_qs.add_argument(
"-qs",
"--qs-score",
dest="qs_score",
default=False,
action="store_true",
help=("Calculate QS-score."))
group_qs.add_argument(
"--qs-rmsd",
dest="qs_rmsd",
default=False,
action="store_true",
help=("Calculate CA RMSD between shared CA atoms of mapped chains.\n"
"This uses a superposition using all mapped chains which\n"
"minimizes the CA RMSD."))
#
# lDDT score arguments
#
group_lddt = parser.add_argument_group('lDDT score arguments')
group_lddt.add_argument(
"-l",
"--lddt",
dest="lddt",
default=False,
action="store_true",
help=("Calculate lDDT."))
group_lddt.add_argument(
"-ir",
"--inclusion-radius",
dest="inclusion_radius",
type=float,
default=15.0,
help=("Distance inclusion radius for lDDT. Defaults to 15 A."))
group_lddt.add_argument(
"-ss",
"--sequence-separation",
dest="sequence_separation",
type=int,
default=0,
help=("Sequence separation. Only distances between residues whose\n"
"separation is higher than the provided parameter are\n"
"considered when computing the score. Defaults to 0."))
group_lddt.add_argument(
"-spr",
"--save-per-residue-scores",
dest="save_per_residue_scores",
default=False,
action="store_true",
help=(""))
# Print full help is no arguments provided
if len(sys.argv) == 1:
......@@ -954,7 +987,7 @@ def _Main():
ost.LogInfo("#" * 80)
ost.LogInfo("Saving output into %s" % opts.output)
with open(opts.output, "w") as outfile:
outfile.write(json.dumps(result, indent=4))
json.dump(result, outfile, indent=4, sort_keys=True)
if __name__ == '__main__':
......
.. Note on large code blocks: keep max. width to 120 or it will look bad
on webpage!
.. TODO: look at argparse directive to autogenerate --help output!
.. ost-actions:
OST Actions
......@@ -15,22 +19,220 @@ Comparing two structures
--------------------------------------------------------------------------------
You can compare two structures in terms of quaternary structure score and
lDDT scores between two complexes from the command line with:
lDDT scores between two complexes from the command line with the
``ost compare-structures`` action.
In summary it performs the following steps:
- Read structures (PDB or mmCIF format, can be gzipped) and split into
biological assemblies (all possible pairs are scored).
- Optional cleanup of structures with :func:`~ost.mol.alg.Molck`.
- Optional structural checks with :func:`~ost.mol.alg.CheckStructure`.
- Unless user-provided, find chain mapping between complexes (see
:attr:`here <ost.mol.alg.qsscoring.QSscorer.chain_mapping>` for details)
- Perform sequence alignment of chain pairs (unless user asks for alignment
based on residue numbers). Alignment can optionally checked for consistency
if 100% sequence identity is expected.
- Compute scores requested by user (CA-RMSD of oligomer,
:mod:`QS scores of oligomer <ost.mol.alg.qsscoring>`,
:class:`single chain lDDT scores <ost.mol.alg.lDDTScorer>`,
:attr:`weighted average of single chain lDDT scores <ost.mol.alg.qsscoring.OligoLDDTScorer.weighted_lddt>`,
:attr:`lDDT score of oligomer <ost.mol.alg.qsscoring.OligoLDDTScorer.oligo_lddt>`).
.. note ::
The action relies on OST's :mod:`~ost.mol.alg.qsscoring` module and has the
same requirements on your OST installation (needs compound library, ClustalW,
numpy and scipy).
Details on the usage (output of ``ost compare-structures --help``):
.. code-block:: console
$ ost compare-structures [-h] [-v VERBOSITY] -m MODEL -r REFERENCE
usage: ost compare-structures [-h] -m MODEL -r REFERENCE [-v VERBOSITY]
[-o OUTPUT] [-d] [-ds DUMP_SUFFIX]
[-rs REFERENCE_SELECTION] [-ms MODEL_SELECTION]
[-ca] [-ft] [-cl COMPOUND_LIBRARY] [-qs]
[-c CHAIN_MAPPING [CHAIN_MAPPING ...]]
[--qs-rmsd] [-rna]
[--qs-max-mappings-extensive QS_MAX_MAPPINGS_EXTENSIVE]
[-l] [-ir INCLUSION_RADIUS]
[-ss SEQUENCE_SEPARATION] [-spr] [-ml]
[-ca] [-ft] [-cl COMPOUND_LIBRARY] [-ml]
[-rm REMOVE [REMOVE ...]] [-ce] [-mn] [-sc]
[-p PARAMETER_FILE] [-bt BOND_TOLERANCE]
[-at ANGLE_TOLERANCE] [-cc]
[-at ANGLE_TOLERANCE]
[-c CHAIN_MAPPING [CHAIN_MAPPING ...]]
[--qs-max-mappings-extensive QS_MAX_MAPPINGS_EXTENSIVE]
[-cc] [-rna] [-qs] [--qs-rmsd] [-l]
[-ir INCLUSION_RADIUS] [-ss SEQUENCE_SEPARATION]
[-spr]
Evaluate model structure against reference.
eg.
ost compare-structures \
--model <MODEL> \
--reference <REF> \
--output output.json \
--lddt \
--structural-checks \
--consistency-checks \
--molck \
--remove oxt hyd \
--map-nonstandard-residues
Here we describe how the parameters can be set to mimick a CAMEO evaluation
(as of August 2018).
CAMEO calls the lddt binary as follows:
lddt \
-p <PARAMETER FILE> \
-f \
-a 15 \
-b 15 \
-r 15 \
<MODEL> \
<REF>
Only model structures are "Molck-ed" in CAMEO. The call to molck is as follows:
molck \
--complib=<COMPOUND LIB> \
--rm=hyd,oxt,unk \
--fix-ele \
--map-nonstd \
--out=<OUTPUT> \
<FILEPATH>
To be as much compatible with with CAMEO as possible one should call
compare-structures as follows:
ost compare-structures \
--model <MODEL> \
--reference <REF> \
--output output.json \
--molck \
--remove oxt hyd unk \
--clean-element-column \
--map-nonstandard-residues \
--structural-checks \
--bond-tolerance 15.0 \
--angle-tolerance 15.0 \
--residue-number-alignment \
--consistency-checks \
--qs-score \
--lddt \
--inclusion-radius 15.0
optional arguments:
-h, --help show this help message and exit
required arguments:
-m MODEL, --model MODEL
Path to the model file.
-r REFERENCE, --reference REFERENCE
Path to the reference file.
general arguments:
-v VERBOSITY, --verbosity VERBOSITY
Set verbosity level. Defaults to 3.
-o OUTPUT, --output OUTPUT
Output file name. The output will be saved as a JSON file.
-d, --dump-structures
Dump cleaned structures used to calculate all the scores as
PDB files using specified suffix. Files will be dumped to the
same location as original files.
-ds DUMP_SUFFIX, --dump-suffix DUMP_SUFFIX
Use this suffix to dump structures.
Defaults to .compare.structures.pdb.
-rs REFERENCE_SELECTION, --reference-selection REFERENCE_SELECTION
Selection performed on reference structures.
-ms MODEL_SELECTION, --model-selection MODEL_SELECTION
Selection performed on model structures.
-ca, --c-alpha-only Use C-alpha atoms only. Equivalent of calling the action with
'--model-selection="aname=CA" --reference-selection="aname=CA"'
options.
-ft, --fault-tolerant
Fault tolerant parsing.
-cl COMPOUND_LIBRARY, --compound-library COMPOUND_LIBRARY
Location of the compound library file (compounds.chemlib).
If not provided, the following locations are searched in this
order: 1. Working directory, 2. OpenStructure standard library
location.
molecular check arguments:
-ml, --molck Run molecular checker to clean up input.
-rm REMOVE [REMOVE ...], --remove REMOVE [REMOVE ...]
Remove atoms and residues matching some criteria:
* zeroocc - Remove atoms with zero occupancy
* hyd - remove hydrogen atoms
* oxt - remove terminal oxygens
* nonstd - remove all residues not one of the 20
* standard amino acids
* unk - Remove unknown and atoms not following the
nomenclature
Defaults to hyd.
-ce, --clean-element-column
Clean up element column
-mn, --map-nonstandard-residues
Map modified residues back to the parent amino acid, for
example MSE -> MET, SEP -> SER.
structural check arguments:
-sc, --structural-checks
Perform structural checks and filter input data.
-p PARAMETER_FILE, --parameter-file PARAMETER_FILE
Location of the stereochemical parameter file
(stereo_chemical_props.txt).
If not provided, the following locations are searched in this
order: 1. Working directory, 2. OpenStructure standard library
location.
-bt BOND_TOLERANCE, --bond-tolerance BOND_TOLERANCE
Tolerance in STD for bonds. Defaults to 12.
-at ANGLE_TOLERANCE, --angle-tolerance ANGLE_TOLERANCE
Tolerance in STD for angles. Defaults to 12.
chain mapping arguments:
-c CHAIN_MAPPING [CHAIN_MAPPING ...], --chain-mapping CHAIN_MAPPING [CHAIN_MAPPING ...]
Mapping of chains between the reference and the model.
Each separate mapping consist of key:value pairs where key
is the chain name in reference and value is the chain name in
model.
--qs-max-mappings-extensive QS_MAX_MAPPINGS_EXTENSIVE
Maximal number of chain mappings to test for 'extensive'
chain mapping scheme which is used as a last resort if
other schemes failed. The extensive chain mapping search
must in the worst case check O(N!) possible mappings for
complexes with N chains. Two octamers without symmetry
would require 322560 mappings to be checked. To limit
computations, no scores are computed if we try more than
the maximal number of chain mappings. Defaults to 1000000.
sequence alignment arguments:
-cc, --consistency-checks
Take consistency checks into account. By default residue name
consistency between a model-reference pair would be checked
but only a warning message will be displayed and the script
will continue to calculate scores. If this flag is ON, checks
will not be ignored and if the pair does not pass the test
all the scores for that pair will be marked as a FAILURE.
-rna, --residue-number-alignment
Make alignment based on residue number instead of using
a global BLOSUM62-based alignment.
QS score arguments:
-qs, --qs-score Calculate QS-score.
--qs-rmsd Calculate CA RMSD between shared CA atoms of mapped chains.
This uses a superposition using all mapped chains which
minimizes the CA RMSD.
lDDT score arguments:
-l, --lddt Calculate lDDT.
-ir INCLUSION_RADIUS, --inclusion-radius INCLUSION_RADIUS
Distance inclusion radius for lDDT. Defaults to 15 A.
-ss SEQUENCE_SEPARATION, --sequence-separation SEQUENCE_SEPARATION
Sequence separation. Only distances between residues whose
separation is higher than the provided parameter are
considered when computing the score. Defaults to 0.
-spr, --save-per-residue-scores
By default the verbosity is set to 3 which will result in the informations
being shown in the console. The result can be (optionally) saved as JSON file
......@@ -49,37 +251,42 @@ The output file has following format:
"residue_names_consistent": <Are the residue numbers consistent? true or false>,
"mapping": {
"chain_mapping": <Mapping of chains eg. {"A": "B", "B": "A"}>,
"chain_mapping_scheme": <Scheme used to get mapping, check mapping manually if "permissive" or "extensive">,
"chain_mapping_scheme": <Scheme used to get mapping, check mapping manually
if "permissive" or "extensive">,
"alignments": <list of chain-chain alignments in FASTA format>
}
},
"lddt": { # calculated when --lddt (-l) option is selected
"lddt": {
# calculated when --lddt (-l) option is selected
"oligo_lddt": {
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"global_score": <calculated oligomeric lddt score>,
"global_score": <calculated oligomeric lDDT score>
},
"weighted_lddt": {
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"global_score": <calculated weighted lddt score>,
"global_score": <calculated weighted lDDT score>
},
"single_chain_lddt": [ # a list of chain-chain lDDts
"single_chain_lddt": [
# a list of chain-chain lDDTs
{
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"reference_chain": <name of the chain in reference>,
"model_chain": <name of the chain in model>
"global_score": <calculated single-chain lddt score>,
"global_score": <calculated single-chain lDDT score>,
"conserved_contacts": <number of conserved contacts between model and reference>,
"total_contacts": <total number of contacts between model and reference>,
"per_residue_scores": [ # per-residue lDDT scores - calculated when --save-per-residue-scores (-spr) option is selected
"total_contacts": <total number of contacts in reference>,
"per_residue_scores": [
# per-residue lDDT scores
# only calculated when --save-per-residue-scores (-spr) option is selected
{
"total_contacts": <total number of contacts between model and reference>,
"residue_name": <three letter code of the residue in reference chain>,
"residue_number": <residue number in reference chain>,
"lddt": <residue lDDT score>,
"conserved_contacts": <number of conserved contacts between model and reference for given residue>,
"residue_number": <residue number in reference chain>
"conserved_contacts": <conserved_contacts for given residue>,
"total_contacts": <total_contacts for given residue>
},
.
.
......@@ -88,13 +295,13 @@ The output file has following format:
}
]
},
"qs_score": { # calculated when --qs-score (-q) option is selected
"qs_score": {
# calculated when --qs-score (-q) option is selected
"status": <SUCCESS or FAILURE>,
"error": <ERROR message if any>,
"global_score": <Global QS-score>,
"best_score": <Best QS-score>,
"best_score": <Best QS-score>
}
}
}
},
......@@ -109,8 +316,9 @@ Example usage:
.. code-block:: console
$ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/bu_target_01.pdb > reference.pdb
$ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb
$ CAMEO_TARGET_URL=https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B
$ curl $CAMEO_TARGET_URL/bu_target_01.pdb > reference.pdb
$ curl $CAMEO_TARGET_URL/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb
$ $OST_ROOT/bin/ost compare-structures \
--model model.pdb --reference reference.pdb --output output.json \
--qs-score --residue-number-alignment --lddt --structural-checks \
......@@ -250,114 +458,133 @@ Example usage:
################################################################################
Saving output into output.json
This reads the model and reference file and calculates QS- and lDDT-scores
between them. In the example above the output file looks as follows (FASTA
alignments were cut in display here for readability):
This reads the model and reference file and calculates QS-score between them.
In the example above the output file looks as follows:
.. code snippet to fix output.json generated above
import json
json_data = json.load(open("output.json"))
mapping = json_data["result"]["model.pdb"]["reference.pdb"]["info"]["mapping"]
new_alns = list()
for aln in mapping["alignments"]:
aln_lines = aln.splitlines()
aln_lines[1] = aln_lines[1][:20] + "..."
aln_lines[3] = aln_lines[3][:20] + "..."
new_alns.append("\n".join(aln_lines))
mapping["alignments"] = new_alns
json_data["options"]["parameter_file"] = "Path to stage/share/openstructure/stereo_chemical_props.txt"
json_data["options"]["compound_library"] = "Path to stage/share/openstructure/compounds.chemlib"
with open("output_fixed.json", "w") as outfile:
json.dump(json_data, outfile, indent=4, sort_keys=True)
.. code-block:: python
.. code-block:: json
{
"options": {
"angle_tolerance": 15.0,
"bond_tolerance": 15.0,
"c_alpha_only": false,
"chain_mapping": null,
"clean_element_column": true,
"compound_library": "Path to stage/share/openstructure/compounds.chemlib",
"consistency_checks": true,
"cwd": "/home/taurielg/GT/Code/ost/build",
"dump_structures": false,
"dump_suffix": ".compare.structures.pdb",
"fault_tolerant": false,
"inclusion_radius": 15.0,
"lddt": true,
"map_nonstandard_residues": true,
"model": "model.pdb",
"model_selection": "",
"molck": true,
"output": "output.json",
"parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt",
"qs_max_mappings_extensive": 1000000,
"qs_rmsd": false,
"qs_score": true,
"reference": "reference.pdb",
"reference_selection": "",
"remove": [
"oxt",
"hyd",
"unk"
],
"residue_number_alignment": true,
"save_per_residue_scores": false,
"sequence_separation": 0,
"structural_checks": true,
"verbosity": 3
},
"result": {
"model.pdb": {
"reference.pdb": {
"info": {
"residue_names_consistent": true,
"mapping": {
"alignments": [
">reference:A\n-PGLFLTLEGLDGSGKTTQA...\n>model:B\nMPGLFLTLEGLDGSGKTTQA...",
">reference:B\n-PGLFLTLEGLDGSGKTTQA...\n>model:A\nMPGLFLTLEGLDGSGKTTQA..."
],
"chain_mapping": {
"A": "B",
"B": "A"
},
"chain_mapping_scheme": "strict",
"alignments": [
">reference:A\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSL---QELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVR-------LGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:B\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP",
">reference:B\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:A\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP"
]
}
"chain_mapping_scheme": "strict"
},
"residue_names_consistent": true
},
"lddt": {
"oligo_lddt": {
"status": "SUCCESS",
"error": "",
"global_score": 0.8025223275721413,
"error": ""
},
"weighted_lddt": {
"status": "SUCCESS",
"global_score": 0.804789180710712,
"error": ""
"status": "SUCCESS"
},
"single_chain_lddt": [
{
"status": "SUCCESS",
"global_score": 0.8257459402084351,
"conserved_contacts": 877834,
"reference_chain": "A",
"total_contacts": 1063080,
"error": "",
"model_chain": "B"
"global_score": 0.8257459402084351,
"model_chain": "B",
"reference_chain": "A",
"status": "SUCCESS",
"total_contacts": 1063080
},
{
"status": "SUCCESS",
"global_score": 0.7854443788528442,
"conserved_contacts": 904568,
"error": "",
"global_score": 0.7854443788528442,
"model_chain": "A",
"reference_chain": "B",
"total_contacts": 1151664,
"status": "SUCCESS",
"total_contacts": 1151664
}
],
"weighted_lddt": {
"error": "",
"model_chain": "A"
"global_score": 0.804789180710712,
"status": "SUCCESS"
}
]
},
"qs_score": {
"status": "SUCCESS",
"global_score": 0.8974384796108209,
"best_score": 0.9022811630070536,
"error": ""
"error": "",
"global_score": 0.8974384796108209,
"status": "SUCCESS"
}
}
}
},
"options": {
"reference": "reference.pdb",
"structural_checks": true,
"chain_mapping": null,
"bond_tolerance": 15.0,
"parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt",
"consistency_checks": true,
"qs_score": true,
"map_nonstandard_residues": true,
"save_per_residue_scores": false,
"fault_tolerant": false,
"reference_selection": "",
"qs_rmsd": false,
"cwd": "CWD",
"inclusion_radius": 15.0,
"angle_tolerance": 15.0,
"c_alpha_only": false,
"clean_element_column": true,
"dump_suffix": ".compare.structures.pdb",
"compound_library": "Path to stage/share/openstructure/compounds.chemlib",
"dump_structures": false,
"residue_number_alignment": true,
"verbosity": 3,
"remove": [
"oxt",
"hyd",
"unk"
],
"molck": true,
"sequence_separation": 0,
"output": "output.json",
"model": "model.pdb",
"lddt": true,
"model_selection": ""
}
}
If all the structures are clean one can omit all the checking steps and
calculate eg. QS-score directly:
If all the structures are clean and have matching residue numbers, one can omit
all the checking steps and calculate scores directly as here:
.. code:: console
$ $OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output_qs.json --qs-score --residue-number-alignment
$ $OST_ROOT/bin/ost compare-structures \
--model model.pdb --reference reference.pdb --output output_qs.json \
--qs-score --residue-number-alignment
################################################################################
Reading input files (fault_tolerant=False)
......@@ -390,4 +617,3 @@ calculate eg. QS-score directly:
QSscore reference.pdb, model.pdb: best: 0.90, global: 0.90
################################################################################
Saving output into output_qs.json
......@@ -549,7 +549,8 @@ Local Distance Test scores (lDDT, DRMSD)
.. class:: lDDTScorer(reference, model, settings)
Object to compute lDDT scores.
Object to compute lDDT scores using :func:`LocalDistDiffTest` as in
`Mariani et al. <https://dx.doi.org/10.1093/bioinformatics/btt473>`_.
Example usage.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment