SCHWED-3890: clarify doc and help of ost compare-structures action.

cbf55fea · Gerardo Tauriello · 166b0b79 · cbf55fea · cbf55fea · cbf55fea
Commit cbf55fea authored Nov 22, 2018 by Gerardo Tauriello
--- a/actions/ost-compare-structures
+++ b/actions/ost-compare-structures
@@ -33,38 +33,29 @@ Only model structures are "Molck-ed" in CAMEO. The call to molck is as follows:
      --complib=<COMPOUND LIB> \\
      --rm=hyd,oxt,unk \\
      --fix-ele \\
-      --map-nonstd <FILEPATH> \\
-      --out=<OUTPUT>
+      --map-nonstd \\
+      --out=<OUTPUT> \\
+      <FILEPATH>

 To be as much compatible with with CAMEO as possible one should call
 compare-structures as follows:

  ost compare-structures \\
-      # General parameters
-      ####################
      --model <MODEL> \\
      --reference <REF> \\
      --output output.json \\
-      # QS-score parameters
-      #####################
-      --qs-score \\
-      --residue-number-alignment \\
-      # lDDT parameters
-      #################
-      --lddt \\
-      --inclusion-radius 15.0 \\
-      # Molecular check parameters
-      ############################
      --molck \\
      --remove oxt hyd unk \\
      --clean-element-column \\
      --map-nonstandard-residues \\
-      # Additional checks
-      ###################
      --structural-checks \\
      --bond-tolerance 15.0 \\
      --angle-tolerance 15.0 \\
-      --consistency-checks
+      --residue-number-alignment \\
+      --consistency-checks \\
+      --qs-score \\
+      --lddt \\
+      --inclusion-radius 15.0
 """

 import os
@@ -131,38 +122,49 @@ def _GetDefaultCompoundLibraryPath():

 def _ParseArgs():
    """Parse command-line arguments."""
-    #
-    # General options
-    #
+
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter,
        description=__doc__,
        prog="ost compare-structures")

-    parser.add_argument(
-        '-v',
-        '--verbosity',
-        type=int,
-        default=3,
-        help="Set verbosity level.")
-    parser.add_argument(
+    #
+    # Required arguments
+    #
+
+    group_required = parser.add_argument_group('required arguments')
+
+    group_required.add_argument(
        "-m",
        "--model",
        dest="model",
        required=True,
        help=("Path to the model file."))
-    parser.add_argument(
+    group_required.add_argument(
        "-r",
        "--reference",
        dest="reference",
        required=True,
        help=("Path to the reference file."))
-    parser.add_argument(
+
+    #
+    # General arguments
+    #
+
+    group_general = parser.add_argument_group('general arguments')
+
+    group_general.add_argument(
+        '-v',
+        '--verbosity',
+        type=int,
+        default=3,
+        help="Set verbosity level. Defaults to 3.")
+    group_general.add_argument(
        "-o",
        "--output",
        dest="output",
        help=("Output file name. The output will be saved as a JSON file."))
-    parser.add_argument(
+    group_general.add_argument(
        "-d",
        "--dump-structures",
        dest="dump_structures",
@@ -171,26 +173,26 @@ def _ParseArgs():
        help=("Dump cleaned structures used to calculate all the scores as\n"
              "PDB files using specified suffix. Files will be dumped to the\n"
              "same location as original files."))
-    parser.add_argument(
+    group_general.add_argument(
        "-ds",
        "--dump-suffix",
        dest="dump_suffix",
        default=".compare.structures.pdb",
        help=("Use this suffix to dump structures.\n"
              "Defaults to .compare.structures.pdb."))
-    parser.add_argument(
+    group_general.add_argument(
        "-rs",
        "--reference-selection",
        dest="reference_selection",
        default="",
        help=("Selection performed on reference structures."))
-    parser.add_argument(
+    group_general.add_argument(
        "-ms",
        "--model-selection",
        dest="model_selection",
        default="",
        help=("Selection performed on model structures."))
-    parser.add_argument(
+    group_general.add_argument(
        "-ca",
        "--c-alpha-only",
        dest="c_alpha_only",
@@ -199,14 +201,14 @@ def _ParseArgs():
        help=("Use C-alpha atoms only. Equivalent of calling the action with\n"
              "'--model-selection=\"aname=CA\" "
              "--reference-selection=\"aname=CA\"'\noptions."))
-    parser.add_argument(
+    group_general.add_argument(
        "-ft",
        "--fault-tolerant",
        dest="fault_tolerant",
        default=False,
        action="store_true",
        help=("Fault tolerant parsing."))
-    parser.add_argument(
+    group_general.add_argument(
        "-cl",
        "--compound-library",
        dest="compound_library",
@@ -215,99 +217,21 @@ def _ParseArgs():
              "If not provided, the following locations are searched in this\n"
              "order: 1. Working directory, 2. OpenStructure standard library"
              "\nlocation."))
+
    #
-    # QS-scorer options
-    #
-    parser.add_argument(
-        "-qs",
-        "--qs-score",
-        dest="qs_score",
-        default=False,
-        action="store_true",
-        help=("Calculate QS-score."))
-    parser.add_argument(
-        "-c",
-        "--chain-mapping",
-        nargs="+",
-        type=lambda x: x.split(":"),
-        dest="chain_mapping",
-        help=("Mapping of chains between the reference and the model.\n"
-              "Each separate mapping consist of key:value pairs where key\n"
-              "is the chain name in reference and value is the chain name in\n"
-              "model."))
-    parser.add_argument(
-        "--qs-rmsd",
-        dest="qs_rmsd",
-        default=False,
-        action="store_true",
-        help=("Calculate CA RMSD between shared CA atoms of mapped chains.\n"
-              "This uses a superposition using all mapped chains which\n"
-              "minimizes the CA RMSD."))
-    parser.add_argument(
-        "-rna",
-        "--residue-number-alignment",
-        dest="residue_number_alignment",
-        default=False,
-        action="store_true",
-        help=("Make alignment based on residue number instead of using\n"
-              "a global BLOSUM62-based alignment."))
-    parser.add_argument(
-        "--qs-max-mappings-extensive",
-        dest="qs_max_mappings_extensive",
-        type=int,
-        default=1000000,
-        help=("Maximal number of chain mappings to test for 'extensive'\n"
-              "chain mapping scheme which is used as a last resort if\n"
-              "other schemes failed. The extensive chain mapping search\n"
-              "must in the worst case check O(N!) possible mappings for\n"
-              "complexes with N chains. Two octamers without symmetry\n"
-              "would require 322560 mappings to be checked. To limit\n"
-              "computations, no scores are computed if we try more than\n"
-              "the maximal number of chain mappings."))
-    #
-    # lDDT options
-    #
-    parser.add_argument(
-        "-l",
-        "--lddt",
-        dest="lddt",
-        default=False,
-        action="store_true",
-        help=("Calculate lDDT."))
-    parser.add_argument(
-        "-ir",
-        "--inclusion-radius",
-        dest="inclusion_radius",
-        type=float,
-        default=15.0,
-        help=("Distance inclusion radius."))
-    parser.add_argument(
-        "-ss",
-        "--sequence-separation",
-        dest="sequence_separation",
-        type=int,
-        default=0,
-        help=("Sequence separation. Only distances between residues whose\n"
-              "separation is higher than the provided parameter are\n"
-              "considered when computing the score"))
-    parser.add_argument(
-        "-spr",
-        "--save-per-residue-scores",
-        dest="save_per_residue_scores",
-        default=False,
-        action="store_true",
-        help=(""))
-    #
-    # Molecular check parameters
+    # Molecular check arguments
    #
-    parser.add_argument(
+
+    group_molck = parser.add_argument_group('molecular check arguments')
+
+    group_molck.add_argument(
        "-ml",
        "--molck",
        dest="molck",
        default=False,
        action="store_true",
        help=("Run molecular checker to clean up input."))
-    parser.add_argument(
+    group_molck.add_argument(
        "-rm",
        "--remove",
        dest="remove",
@@ -321,15 +245,16 @@ def _ParseArgs():
              " * nonstd - remove all residues not one of the 20\n"
              " * standard amino acids\n"
              " * unk - Remove unknown and atoms not following the\n"
-              "         nomenclature"))
-    parser.add_argument(
+              "         nomenclature\n"
+              "Defaults to hyd."))
+    group_molck.add_argument(
        "-ce",
        "--clean-element-column",
        dest="clean_element_column",
        default=False,
        action="store_true",
        help=("Clean up element column"))
-    parser.add_argument(
+    group_molck.add_argument(
        "-mn",
        "--map-nonstandard-residues",
        dest="map_nonstandard_residues",
@@ -337,17 +262,21 @@ def _ParseArgs():
        action="store_true",
        help=("Map modified residues back to the parent amino acid, for\n"
              "example MSE -> MET, SEP -> SER."))
+    
    #
-    # Options for various checks
+    # Structural check arguments
    #
-    parser.add_argument(
+
+    group_sc = parser.add_argument_group('structural check arguments')
+
+    group_sc.add_argument(
        "-sc",
        "--structural-checks",
        dest="structural_checks",
        default=False,
        action="store_true",
        help=("Perform structural checks and filter input data."))
-    parser.add_argument(
+    group_sc.add_argument(
        "-p",
        "--parameter-file",
        dest="parameter_file",
@@ -357,21 +286,58 @@ def _ParseArgs():
              "If not provided, the following locations are searched in this\n"
              "order: 1. Working directory, 2. OpenStructure standard library"
              "\nlocation."))
-    parser.add_argument(
+    group_sc.add_argument(
        "-bt",
        "--bond-tolerance",
        dest="bond_tolerance",
        type=float,
        default=12.0,
-        help=("Tolerance in STD for bonds."))
-    parser.add_argument(
+        help=("Tolerance in STD for bonds. Defaults to 12."))
+    group_sc.add_argument(
        "-at",
        "--angle-tolerance",
        dest="angle_tolerance",
        type=float,
        default=12.0,
-        help=("Tolerance in STD for angles."))
-    parser.add_argument(
+        help=("Tolerance in STD for angles. Defaults to 12."))
+
+    #
+    # Chain mapping arguments
+    #
+
+    group_cm = parser.add_argument_group('chain mapping arguments')
+
+    group_cm.add_argument(
+        "-c",
+        "--chain-mapping",
+        nargs="+",
+        type=lambda x: x.split(":"),
+        dest="chain_mapping",
+        help=("Mapping of chains between the reference and the model.\n"
+              "Each separate mapping consist of key:value pairs where key\n"
+              "is the chain name in reference and value is the chain name in\n"
+              "model."))
+    group_cm.add_argument(
+        "--qs-max-mappings-extensive",
+        dest="qs_max_mappings_extensive",
+        type=int,
+        default=1000000,
+        help=("Maximal number of chain mappings to test for 'extensive'\n"
+              "chain mapping scheme which is used as a last resort if\n"
+              "other schemes failed. The extensive chain mapping search\n"
+              "must in the worst case check O(N!) possible mappings for\n"
+              "complexes with N chains. Two octamers without symmetry\n"
+              "would require 322560 mappings to be checked. To limit\n"
+              "computations, no scores are computed if we try more than\n"
+              "the maximal number of chain mappings. Defaults to 1000000."))
+
+    #
+    # Sequence alignment arguments
+    #
+
+    group_aln = parser.add_argument_group('sequence alignment arguments')
+
+    group_aln.add_argument(
        "-cc",
        "--consistency-checks",
        dest="consistency_checks",
@@ -383,6 +349,73 @@ def _ParseArgs():
              "will continue to calculate scores. If this flag is ON, checks\n"
              "will not be ignored and if the pair does not pass the test\n"
              "all the scores for that pair will be marked as a FAILURE."))
+    group_aln.add_argument(
+        "-rna",
+        "--residue-number-alignment",
+        dest="residue_number_alignment",
+        default=False,
+        action="store_true",
+        help=("Make alignment based on residue number instead of using\n"
+              "a global BLOSUM62-based alignment."))
+
+    #
+    # QS score arguments
+    #
+
+    group_qs = parser.add_argument_group('QS score arguments')
+
+    group_qs.add_argument(
+        "-qs",
+        "--qs-score",
+        dest="qs_score",
+        default=False,
+        action="store_true",
+        help=("Calculate QS-score."))
+    group_qs.add_argument(
+        "--qs-rmsd",
+        dest="qs_rmsd",
+        default=False,
+        action="store_true",
+        help=("Calculate CA RMSD between shared CA atoms of mapped chains.\n"
+              "This uses a superposition using all mapped chains which\n"
+              "minimizes the CA RMSD."))
+
+    #
+    # lDDT score arguments
+    #
+
+    group_lddt = parser.add_argument_group('lDDT score arguments')
+
+    group_lddt.add_argument(
+        "-l",
+        "--lddt",
+        dest="lddt",
+        default=False,
+        action="store_true",
+        help=("Calculate lDDT."))
+    group_lddt.add_argument(
+        "-ir",
+        "--inclusion-radius",
+        dest="inclusion_radius",
+        type=float,
+        default=15.0,
+        help=("Distance inclusion radius for lDDT. Defaults to 15 A."))
+    group_lddt.add_argument(
+        "-ss",
+        "--sequence-separation",
+        dest="sequence_separation",
+        type=int,
+        default=0,
+        help=("Sequence separation. Only distances between residues whose\n"
+              "separation is higher than the provided parameter are\n"
+              "considered when computing the score. Defaults to 0."))
+    group_lddt.add_argument(
+        "-spr",
+        "--save-per-residue-scores",
+        dest="save_per_residue_scores",
+        default=False,
+        action="store_true",
+        help=(""))

    # Print full help is no arguments provided
    if len(sys.argv) == 1:
@@ -954,7 +987,7 @@ def _Main():
        ost.LogInfo("#" * 80)
        ost.LogInfo("Saving output into %s" % opts.output)
        with open(opts.output, "w") as outfile:
-            outfile.write(json.dumps(result, indent=4))
+            json.dump(result, outfile, indent=4, sort_keys=True)


 if __name__ == '__main__':

--- a/modules/doc/actions.rst
+++ b/modules/doc/actions.rst
+..  Note on large code blocks: keep max. width to 120 or it will look bad
+                               on webpage!
+..  TODO: look at argparse directive to autogenerate --help output!
+
 .. ost-actions:

 OST Actions
@@ -15,22 +19,220 @@ Comparing two structures
 --------------------------------------------------------------------------------

 You can compare two structures in terms of quaternary structure score and
-lDDT scores between two complexes from the command line with:
+lDDT scores between two complexes from the command line with the
+``ost compare-structures`` action.
+
+In summary it performs the following steps:
+
+- Read structures (PDB or mmCIF format, can be gzipped) and split into
+  biological assemblies (all possible pairs are scored).
+- Optional cleanup of structures with :func:`~ost.mol.alg.Molck`.
+- Optional structural checks with :func:`~ost.mol.alg.CheckStructure`.
+- Unless user-provided, find chain mapping between complexes (see
+  :attr:`here <ost.mol.alg.qsscoring.QSscorer.chain_mapping>` for details)
+- Perform sequence alignment of chain pairs (unless user asks for alignment
+  based on residue numbers). Alignment can optionally checked for consistency
+  if 100% sequence identity is expected.
+- Compute scores requested by user (CA-RMSD of oligomer,
+  :mod:`QS scores of oligomer <ost.mol.alg.qsscoring>`,
+  :class:`single chain lDDT scores <ost.mol.alg.lDDTScorer>`,
+  :attr:`weighted average of single chain lDDT scores <ost.mol.alg.qsscoring.OligoLDDTScorer.weighted_lddt>`,
+  :attr:`lDDT score of oligomer <ost.mol.alg.qsscoring.OligoLDDTScorer.oligo_lddt>`).
+
+.. note ::
+
+  The action relies on OST's :mod:`~ost.mol.alg.qsscoring` module and has the
+  same requirements on your OST installation (needs compound library, ClustalW,
+  numpy and scipy).
+
+Details on the usage (output of ``ost compare-structures --help``):

 .. code-block:: console

-  $ ost compare-structures [-h] [-v VERBOSITY] -m MODEL -r REFERENCE
+  usage: ost compare-structures [-h] -m MODEL -r REFERENCE [-v VERBOSITY]
                                [-o OUTPUT] [-d] [-ds DUMP_SUFFIX]
                                [-rs REFERENCE_SELECTION] [-ms MODEL_SELECTION]
-                           [-ca] [-ft] [-cl COMPOUND_LIBRARY] [-qs]
-                           [-c CHAIN_MAPPING [CHAIN_MAPPING ...]]
-                           [--qs-rmsd] [-rna]
-                           [--qs-max-mappings-extensive QS_MAX_MAPPINGS_EXTENSIVE]
-                           [-l] [-ir INCLUSION_RADIUS]
-                           [-ss SEQUENCE_SEPARATION] [-spr] [-ml]
+                                [-ca] [-ft] [-cl COMPOUND_LIBRARY] [-ml]
                                [-rm REMOVE [REMOVE ...]] [-ce] [-mn] [-sc]
                                [-p PARAMETER_FILE] [-bt BOND_TOLERANCE]
-                           [-at ANGLE_TOLERANCE] [-cc]
+                                [-at ANGLE_TOLERANCE]
+                                [-c CHAIN_MAPPING [CHAIN_MAPPING ...]]
+                                [--qs-max-mappings-extensive QS_MAX_MAPPINGS_EXTENSIVE]
+                                [-cc] [-rna] [-qs] [--qs-rmsd] [-l]
+                                [-ir INCLUSION_RADIUS] [-ss SEQUENCE_SEPARATION]
+                                [-spr]
+
+  Evaluate model structure against reference.
+
+  eg.
+
+    ost compare-structures \
+        --model <MODEL> \
+        --reference <REF> \
+        --output output.json \
+        --lddt \
+        --structural-checks \
+        --consistency-checks \
+        --molck \
+        --remove oxt hyd \
+        --map-nonstandard-residues
+
+  Here we describe how the parameters can be set to mimick a CAMEO evaluation
+  (as of August 2018).
+
+  CAMEO calls the lddt binary as follows:
+
+    lddt \
+        -p <PARAMETER FILE> \
+        -f \
+        -a 15 \
+        -b 15 \
+        -r 15 \
+        <MODEL> \
+        <REF>
+
+  Only model structures are "Molck-ed" in CAMEO. The call to molck is as follows:
+
+    molck \
+        --complib=<COMPOUND LIB> \
+        --rm=hyd,oxt,unk \
+        --fix-ele \
+        --map-nonstd \
+        --out=<OUTPUT> \
+        <FILEPATH>
+
+  To be as much compatible with with CAMEO as possible one should call
+  compare-structures as follows:
+
+    ost compare-structures \
+        --model <MODEL> \
+        --reference <REF> \
+        --output output.json \
+        --molck \
+        --remove oxt hyd unk \
+        --clean-element-column \
+        --map-nonstandard-residues \
+        --structural-checks \
+        --bond-tolerance 15.0 \
+        --angle-tolerance 15.0 \
+        --residue-number-alignment \
+        --consistency-checks \
+        --qs-score \
+        --lddt \
+        --inclusion-radius 15.0
+
+  optional arguments:
+    -h, --help            show this help message and exit
+
+  required arguments:
+    -m MODEL, --model MODEL
+                          Path to the model file.
+    -r REFERENCE, --reference REFERENCE
+                          Path to the reference file.
+
+  general arguments:
+    -v VERBOSITY, --verbosity VERBOSITY
+                          Set verbosity level. Defaults to 3.
+    -o OUTPUT, --output OUTPUT
+                          Output file name. The output will be saved as a JSON file.
+    -d, --dump-structures
+                          Dump cleaned structures used to calculate all the scores as
+                          PDB files using specified suffix. Files will be dumped to the
+                          same location as original files.
+    -ds DUMP_SUFFIX, --dump-suffix DUMP_SUFFIX
+                          Use this suffix to dump structures.
+                          Defaults to .compare.structures.pdb.
+    -rs REFERENCE_SELECTION, --reference-selection REFERENCE_SELECTION
+                          Selection performed on reference structures.
+    -ms MODEL_SELECTION, --model-selection MODEL_SELECTION
+                          Selection performed on model structures.
+    -ca, --c-alpha-only   Use C-alpha atoms only. Equivalent of calling the action with
+                          '--model-selection="aname=CA" --reference-selection="aname=CA"'
+                          options.
+    -ft, --fault-tolerant
+                          Fault tolerant parsing.
+    -cl COMPOUND_LIBRARY, --compound-library COMPOUND_LIBRARY
+                          Location of the compound library file (compounds.chemlib).
+                          If not provided, the following locations are searched in this
+                          order: 1. Working directory, 2. OpenStructure standard library
+                          location.
+
+  molecular check arguments:
+    -ml, --molck          Run molecular checker to clean up input.
+    -rm REMOVE [REMOVE ...], --remove REMOVE [REMOVE ...]
+                          Remove atoms and residues matching some criteria:
+                           * zeroocc - Remove atoms with zero occupancy
+                           * hyd - remove hydrogen atoms
+                           * oxt - remove terminal oxygens
+                           * nonstd - remove all residues not one of the 20
+                           * standard amino acids
+                           * unk - Remove unknown and atoms not following the
+                                   nomenclature
+                          Defaults to hyd.
+    -ce, --clean-element-column
+                          Clean up element column
+    -mn, --map-nonstandard-residues
+                          Map modified residues back to the parent amino acid, for
+                          example MSE -> MET, SEP -> SER.
+
+  structural check arguments:
+    -sc, --structural-checks
+                          Perform structural checks and filter input data.
+    -p PARAMETER_FILE, --parameter-file PARAMETER_FILE
+                          Location of the stereochemical parameter file
+                          (stereo_chemical_props.txt).
+                          If not provided, the following locations are searched in this
+                          order: 1. Working directory, 2. OpenStructure standard library
+                          location.
+    -bt BOND_TOLERANCE, --bond-tolerance BOND_TOLERANCE
+                          Tolerance in STD for bonds. Defaults to 12.
+    -at ANGLE_TOLERANCE, --angle-tolerance ANGLE_TOLERANCE
+                          Tolerance in STD for angles. Defaults to 12.
+
+  chain mapping arguments:
+    -c CHAIN_MAPPING [CHAIN_MAPPING ...], --chain-mapping CHAIN_MAPPING [CHAIN_MAPPING ...]
+                          Mapping of chains between the reference and the model.
+                          Each separate mapping consist of key:value pairs where key
+                          is the chain name in reference and value is the chain name in
+                          model.
+    --qs-max-mappings-extensive QS_MAX_MAPPINGS_EXTENSIVE
+                          Maximal number of chain mappings to test for 'extensive'
+                          chain mapping scheme which is used as a last resort if
+                          other schemes failed. The extensive chain mapping search
+                          must in the worst case check O(N!) possible mappings for
+                          complexes with N chains. Two octamers without symmetry
+                          would require 322560 mappings to be checked. To limit
+                          computations, no scores are computed if we try more than
+                          the maximal number of chain mappings. Defaults to 1000000.
+
+  sequence alignment arguments:
+    -cc, --consistency-checks
+                          Take consistency checks into account. By default residue name
+                          consistency between a model-reference pair would be checked
+                          but only a warning message will be displayed and the script
+                          will continue to calculate scores. If this flag is ON, checks
+                          will not be ignored and if the pair does not pass the test
+                          all the scores for that pair will be marked as a FAILURE.
+    -rna, --residue-number-alignment
+                          Make alignment based on residue number instead of using
+                          a global BLOSUM62-based alignment.
+
+  QS score arguments:
+    -qs, --qs-score       Calculate QS-score.
+    --qs-rmsd             Calculate CA RMSD between shared CA atoms of mapped chains.
+                          This uses a superposition using all mapped chains which
+                          minimizes the CA RMSD.
+
+  lDDT score arguments:
+    -l, --lddt            Calculate lDDT.
+    -ir INCLUSION_RADIUS, --inclusion-radius INCLUSION_RADIUS
+                          Distance inclusion radius for lDDT. Defaults to 15 A.
+    -ss SEQUENCE_SEPARATION, --sequence-separation SEQUENCE_SEPARATION
+                          Sequence separation. Only distances between residues whose
+                          separation is higher than the provided parameter are
+                          considered when computing the score. Defaults to 0.
+    -spr, --save-per-residue-scores
+

 By default the verbosity is set to 3 which will result in the informations
 being shown in the console. The result can be (optionally) saved as JSON file
@@ -49,37 +251,42 @@ The output file has following format:
                      "residue_names_consistent": <Are the residue numbers consistent? true or false>,
                      "mapping": {
                          "chain_mapping": <Mapping of chains eg. {"A": "B", "B": "A"}>,
-                          "chain_mapping_scheme": <Scheme used to get mapping, check mapping manually if "permissive" or "extensive">,
+                          "chain_mapping_scheme": <Scheme used to get mapping, check mapping manually
+                                                   if "permissive" or "extensive">,
                          "alignments": <list of chain-chain alignments in FASTA format>
                      }
                  }, 
-                  "lddt": {  # calculated when --lddt (-l) option is selected
+                  "lddt": {
+                      # calculated when --lddt (-l) option is selected
                      "oligo_lddt": {
                          "status": <SUCCESS or FAILURE>,
                          "error": <ERROR message if any>, 
-                          "global_score": <calculated oligomeric lddt score>, 
+                          "global_score": <calculated oligomeric lDDT score>
                      }, 
                      "weighted_lddt": {
                          "status": <SUCCESS or FAILURE>,
                          "error": <ERROR message if any>, 
-                          "global_score": <calculated weighted lddt score>, 
+                          "global_score": <calculated weighted lDDT score>
                      }, 
-                      "single_chain_lddt": [ # a list of chain-chain lDDts
+                      "single_chain_lddt": [
+                          # a list of chain-chain lDDTs
                          {
                              "status": <SUCCESS or FAILURE>,
                              "error": <ERROR message if any>, 
                              "reference_chain": <name of the chain in reference>, 
                              "model_chain": <name of the chain in model>
-                              "global_score": <calculated single-chain lddt score>, 
+                              "global_score": <calculated single-chain lDDT score>, 
                              "conserved_contacts": <number of conserved contacts between model and reference>,
-                              "total_contacts": <total number of contacts between model and reference>,
-                              "per_residue_scores": [  # per-residue lDDT scores - calculated when --save-per-residue-scores (-spr) option is selected
+                              "total_contacts": <total number of contacts in reference>,
+                              "per_residue_scores": [
+                                  # per-residue lDDT scores
+                                  # only calculated when --save-per-residue-scores (-spr) option is selected
                                  {
-                                      "total_contacts": <total number of contacts between model and reference>, 
                                      "residue_name": <three letter code of the residue in reference chain>,
+                                      "residue_number": <residue number in reference chain>,
                                      "lddt": <residue lDDT score>,
-                                      "conserved_contacts": <number of conserved contacts between model and reference for given residue>, 
-                                      "residue_number": <residue number in reference chain>
+                                      "conserved_contacts": <conserved_contacts for given residue>,
+                                      "total_contacts": <total_contacts for given residue>
                                  },
                                  .
                                  .
@@ -88,13 +295,13 @@ The output file has following format:
                          }
                      ]
                  },
-                  "qs_score": {   # calculated when --qs-score (-q) option is selected
+                  "qs_score": {
+                    # calculated when --qs-score (-q) option is selected
                    "status": <SUCCESS or FAILURE>,
                    "error": <ERROR message if any>,
                    "global_score": <Global QS-score>,
-                    "best_score": <Best QS-score>, 
+                    "best_score": <Best QS-score>
                  }
-
              }
          }
      }, 
@@ -109,8 +316,9 @@ Example usage:

 .. code-block:: console

-  $ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/bu_target_01.pdb > reference.pdb
-  $ curl https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb
+  $ CAMEO_TARGET_URL=https://www.cameo3d.org/static/data/modeling/2018.03.03/5X7J_B
+  $ curl $CAMEO_TARGET_URL/bu_target_01.pdb > reference.pdb
+  $ curl $CAMEO_TARGET_URL/servers/server11/oligo_model-1/superposed_oligo_model-1.pdb > model.pdb
  $ $OST_ROOT/bin/ost compare-structures \
        --model model.pdb --reference reference.pdb --output output.json \
        --qs-score --residue-number-alignment --lddt --structural-checks \
@@ -250,114 +458,133 @@ Example usage:
  ################################################################################
  Saving output into output.json

+This reads the model and reference file and calculates QS- and lDDT-scores
+between them. In the example above the output file looks as follows (FASTA
+alignments were cut in display here for readability):

-This reads the model and reference file and calculates QS-score between them.
-In the example above the output file looks as follows:
+..  code snippet to fix output.json generated above
+  import json
+  json_data = json.load(open("output.json"))
+  mapping = json_data["result"]["model.pdb"]["reference.pdb"]["info"]["mapping"]
+  new_alns = list()
+  for aln in mapping["alignments"]:
+    aln_lines = aln.splitlines()
+    aln_lines[1] = aln_lines[1][:20] + "..."
+    aln_lines[3] = aln_lines[3][:20] + "..."
+    new_alns.append("\n".join(aln_lines))
+  mapping["alignments"] = new_alns
+  json_data["options"]["parameter_file"] = "Path to stage/share/openstructure/stereo_chemical_props.txt"
+  json_data["options"]["compound_library"] = "Path to stage/share/openstructure/compounds.chemlib"
+  with open("output_fixed.json", "w") as outfile:
+    json.dump(json_data, outfile, indent=4, sort_keys=True)

-.. code-block:: python
+.. code-block:: json

  {
+      "options": {
+          "angle_tolerance": 15.0, 
+          "bond_tolerance": 15.0, 
+          "c_alpha_only": false, 
+          "chain_mapping": null, 
+          "clean_element_column": true, 
+          "compound_library": "Path to stage/share/openstructure/compounds.chemlib", 
+          "consistency_checks": true, 
+          "cwd": "/home/taurielg/GT/Code/ost/build", 
+          "dump_structures": false, 
+          "dump_suffix": ".compare.structures.pdb", 
+          "fault_tolerant": false, 
+          "inclusion_radius": 15.0, 
+          "lddt": true, 
+          "map_nonstandard_residues": true, 
+          "model": "model.pdb", 
+          "model_selection": "", 
+          "molck": true, 
+          "output": "output.json", 
+          "parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt", 
+          "qs_max_mappings_extensive": 1000000, 
+          "qs_rmsd": false, 
+          "qs_score": true, 
+          "reference": "reference.pdb", 
+          "reference_selection": "", 
+          "remove": [
+              "oxt", 
+              "hyd", 
+              "unk"
+          ], 
+          "residue_number_alignment": true, 
+          "save_per_residue_scores": false, 
+          "sequence_separation": 0, 
+          "structural_checks": true, 
+          "verbosity": 3
+      }, 
      "result": {
          "model.pdb": {
              "reference.pdb": {
                  "info": {
-                      "residue_names_consistent": true, 
                      "mapping": {
+                          "alignments": [
+                              ">reference:A\n-PGLFLTLEGLDGSGKTTQA...\n>model:B\nMPGLFLTLEGLDGSGKTTQA...", 
+                              ">reference:B\n-PGLFLTLEGLDGSGKTTQA...\n>model:A\nMPGLFLTLEGLDGSGKTTQA..."
+                          ], 
                          "chain_mapping": {
                              "A": "B", 
                              "B": "A"
                          }, 
-                          "chain_mapping_scheme": "strict", 
-                          "alignments": [
-                              ">reference:A\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSL---QELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVR-------LGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:B\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP", 
-                              ">reference:B\n-PGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP\n>model:A\nMPGLFLTLEGLDGSGKTTQARRLAAFLEAQGRPVLLTREPGGGLPEVRSLLLTQELSPEAEYLLFSADRAEHVRKVILPGLAAGKVVISDRYLDSSLAYQGYGRGLPLPWLREVAREATRGLKPRLTFLLDLPPEAALRRVRRPDRLEGLGLEFFRRVREGYLALARAEPGRFVVLDATLPEEEIARAIQAHLRPLLP"
-                          ]
-                      }
+                          "chain_mapping_scheme": "strict"
+                      }, 
+                      "residue_names_consistent": true
                  }, 
                  "lddt": {
                      "oligo_lddt": {
-                          "status": "SUCCESS", 
+                          "error": "", 
                          "global_score": 0.8025223275721413, 
-                          "error": ""
-                      }, 
-                      "weighted_lddt": {
-                          "status": "SUCCESS", 
-                          "global_score": 0.804789180710712, 
-                          "error": ""
+                          "status": "SUCCESS"
                      }, 
                      "single_chain_lddt": [
                          {
-                              "status": "SUCCESS", 
-                              "global_score": 0.8257459402084351, 
                              "conserved_contacts": 877834, 
-                              "reference_chain": "A", 
-                              "total_contacts": 1063080, 
                              "error": "", 
-                              "model_chain": "B"
+                              "global_score": 0.8257459402084351, 
+                              "model_chain": "B", 
+                              "reference_chain": "A", 
+                              "status": "SUCCESS", 
+                              "total_contacts": 1063080
                          }, 
                          {
-                              "status": "SUCCESS", 
-                              "global_score": 0.7854443788528442, 
                              "conserved_contacts": 904568, 
+                              "error": "", 
+                              "global_score": 0.7854443788528442, 
+                              "model_chain": "A", 
                              "reference_chain": "B", 
-                              "total_contacts": 1151664, 
+                              "status": "SUCCESS", 
+                              "total_contacts": 1151664
+                          }
+                      ], 
+                      "weighted_lddt": {
                          "error": "", 
-                              "model_chain": "A"
+                          "global_score": 0.804789180710712, 
+                          "status": "SUCCESS"
                      }
-                      ]
                  }, 
                  "qs_score": {
-                      "status": "SUCCESS", 
-                      "global_score": 0.8974384796108209, 
                      "best_score": 0.9022811630070536, 
-                      "error": ""
+                      "error": "", 
+                      "global_score": 0.8974384796108209, 
+                      "status": "SUCCESS"
                  }
              }
          }
-      }, 
-      "options": {
-          "reference": "reference.pdb", 
-          "structural_checks": true, 
-          "chain_mapping": null, 
-          "bond_tolerance": 15.0, 
-          "parameter_file": "Path to stage/share/openstructure/stereo_chemical_props.txt", 
-          "consistency_checks": true, 
-          "qs_score": true, 
-          "map_nonstandard_residues": true, 
-          "save_per_residue_scores": false, 
-          "fault_tolerant": false, 
-          "reference_selection": "", 
-          "qs_rmsd": false, 
-          "cwd": "CWD", 
-          "inclusion_radius": 15.0, 
-          "angle_tolerance": 15.0, 
-          "c_alpha_only": false, 
-          "clean_element_column": true, 
-          "dump_suffix": ".compare.structures.pdb", 
-          "compound_library": "Path to stage/share/openstructure/compounds.chemlib", 
-          "dump_structures": false, 
-          "residue_number_alignment": true, 
-          "verbosity": 3, 
-          "remove": [
-              "oxt", 
-              "hyd", 
-              "unk"
-          ], 
-          "molck": true, 
-          "sequence_separation": 0, 
-          "output": "output.json", 
-          "model": "model.pdb", 
-          "lddt": true, 
-          "model_selection": ""
      }
  }

-If all the structures are clean one can omit all the checking steps and
-calculate eg. QS-score directly:
+If all the structures are clean and have matching residue numbers, one can omit
+all the checking steps and calculate scores directly as here:

 .. code:: console

-  $ $OST_ROOT/bin/ost compare-structures --model model.pdb --reference reference.pdb --output output_qs.json --qs-score --residue-number-alignment
+  $ $OST_ROOT/bin/ost compare-structures \
+        --model model.pdb --reference reference.pdb --output output_qs.json \
+        --qs-score --residue-number-alignment

  ################################################################################
  Reading input files (fault_tolerant=False)
@@ -390,4 +617,3 @@ calculate eg. QS-score directly:
  QSscore reference.pdb, model.pdb: best: 0.90, global: 0.90
  ################################################################################
  Saving output into output_qs.json
-
--- a/modules/mol/alg/doc/molalg.rst
+++ b/modules/mol/alg/doc/molalg.rst
@@ -549,7 +549,8 @@ Local Distance Test scores (lDDT, DRMSD)

 .. class:: lDDTScorer(reference, model, settings)

-  Object to compute lDDT scores.
+  Object to compute lDDT scores using :func:`LocalDistDiffTest` as in
+  `Mariani et al. <https://dx.doi.org/10.1093/bioinformatics/btt473>`_.
  
  Example usage.