Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
258f724f
Unverified
Commit
258f724f
authored
1 year ago
by
Xavier Robin
Browse files
Options
Downloads
Patches
Plain Diff
refactor: use new CreateBU and SaveMMCIF functions
parent
b48621a3
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
actions/ost-compare-structures
+57
-83
57 additions, 83 deletions
actions/ost-compare-structures
with
57 additions
and
83 deletions
actions/ost-compare-structures
+
57
−
83
View file @
258f724f
...
@@ -80,11 +80,10 @@ ost compare-structures -m model.pdb -r reference.cif -c A:B B:A
...
@@ -80,11 +80,10 @@ ost compare-structures -m model.pdb -r reference.cif -c A:B B:A
import argparse
import argparse
import os
import os
import json
import json
import time
import sys
import traceback
import traceback
import math
import math
import ost
from ost import io
from ost import io
from ost.mol.alg import scoring
from ost.mol.alg import scoring
...
@@ -186,17 +185,17 @@ def _ParseArgs():
...
@@ -186,17 +185,17 @@ def _ParseArgs():
dest="dump_structures",
dest="dump_structures",
default=False,
default=False,
action="store_true",
action="store_true",
help=("Dump cleaned structures used to calculate all the scores as "
help=("Dump cleaned structures used to calculate all the scores as
PDB
"
"
PDB
files using specified suffix. Files will be dumped to
the
"
"
or mmCIF
files using specified suffix. Files will be dumped to"
"same location as original files."))
"
the
same location
and in the same format
as original files."))
parser.add_argument(
parser.add_argument(
"-ds",
"-ds",
"--dump-suffix",
"--dump-suffix",
dest="dump_suffix",
dest="dump_suffix",
default="
.
compare
.
structures
.pdb
",
default="
_
compare
_
structures",
help=("Use this suffix to dump structures.\n"
help=("Use this suffix to dump structures.\n"
"Defaults to
.
compare
.
structures
.pdb.
"))
"Defaults to
_
compare
_
structures"))
parser.add_argument(
parser.add_argument(
"-ft",
"-ft",
...
@@ -534,50 +533,19 @@ def _RoundOrNone(num, decimals = 3):
...
@@ -534,50 +533,19 @@ def _RoundOrNone(num, decimals = 3):
return None
return None
return round(num, decimals)
return round(num, decimals)
def _Rename(ent):
def _AddSuffix(filename, dump_suffix):
"""Revert chain names to original names.
"""Add dump_suffix to the file name.
PDBize assigns chain name in order A,B,C,D... which does not allow to infer
the original chain name. We do a renaming here:
if there are two chains mapping to chain A the resulting
chain names will be: A and A2.
"""
"""
new_chain_names = list()
root, ext = os.path.splitext(filename)
chain_indices = list() # the chains where we actually change the name
if ext == ".gz":
suffix_indices = dict() # keep track of whats the current suffix index
root, ext2 = os.path.splitext(root)
# for each original chain name
ext = ext2 + ext
return root + dump_suffix + ext
for ch_idx, ch in enumerate(ent.chains):
if not ch.HasProp("original_name"):
def _GetStructureFormat(structure_path, sformat=None):
# pdbize doesnt set this property for chain names in ['_', '-']
"""Get the structure format and return it as "pdb" or "mmcif".
continue
original_name = ch.GetStringProp("original_name")
if original_name in new_chain_names:
new_name = original_name + str(suffix_indices[original_name])
new_chain_names.append(new_name)
suffix_indices[original_name] = suffix_indices[original_name] + 1
else:
new_chain_names.append(original_name)
suffix_indices[original_name] = 2
chain_indices.append(ch_idx)
editor = ent.EditXCS()
# rename to nonsense to avoid clashing chain names
for ch_idx in chain_indices:
editor.RenameChain(ent.chains[ch_idx], ent.chains[ch_idx].name+"_yolo")
# and do final renaming
for new_name, ch_idx in zip(new_chain_names, chain_indices):
editor.RenameChain(ent.chains[ch_idx], new_name)
def _LoadStructure(structure_path, sformat=None, fault_tolerant=False,
bu_idx=None):
"""Read OST entity either from mmCIF or PDB.
The returned structure has structure_path attached as structure name
"""
"""
if not os.path.exists(structure_path):
raise Exception(f"file not found: {structure_path}")
if sformat is None:
if sformat is None:
# Determine file format from suffix.
# Determine file format from suffix.
ext = structure_path.split(".")
ext = structure_path.split(".")
...
@@ -587,11 +555,26 @@ def _LoadStructure(structure_path, sformat=None, fault_tolerant=False,
...
@@ -587,11 +555,26 @@ def _LoadStructure(structure_path, sformat=None, fault_tolerant=False,
raise Exception(f"Could not determine format of file "
raise Exception(f"Could not determine format of file "
f"{structure_path}.")
f"{structure_path}.")
sformat = ext[-1].lower()
sformat = ext[-1].lower()
if sformat in ["mmcif", "cif"]:
return "mmcif"
elif sformat == "pdb":
return sformat
else:
raise Exception(f"Unknown/unsupported file format found for "
f"file {structure_path}.")
def _LoadStructure(structure_path, sformat, fault_tolerant, bu_idx):
"""Read OST entity either from mmCIF or PDB.
The returned structure has structure_path attached as structure name
"""
if not os.path.exists(structure_path):
raise Exception(f"file not found: {structure_path}")
# increase loglevel, as we would pollute the info log with weird stuff
# increase loglevel, as we would pollute the info log with weird stuff
ost.PushVerbosityLevel(ost.LogLevel.Error)
ost.PushVerbosityLevel(ost.LogLevel.Error)
# Load the structure
# Load the structure
if sformat
in [
"mmcif"
, "cif"]
:
if sformat
==
"mmcif":
if bu_idx is not None:
if bu_idx is not None:
cif_entity, cif_seqres, cif_info = \
cif_entity, cif_seqres, cif_info = \
io.LoadMMCIF(structure_path, info=True, seqres=True,
io.LoadMMCIF(structure_path, info=True, seqres=True,
...
@@ -600,28 +583,31 @@ def _LoadStructure(structure_path, sformat=None, fault_tolerant=False,
...
@@ -600,28 +583,31 @@ def _LoadStructure(structure_path, sformat=None, fault_tolerant=False,
raise RuntimeError(f"Invalid biounit index - requested {bu_idx} "
raise RuntimeError(f"Invalid biounit index - requested {bu_idx} "
f"must be < {len(cif_info.biounits)}.")
f"must be < {len(cif_info.biounits)}.")
biounit = cif_info.biounits[bu_idx]
biounit = cif_info.biounits[bu_idx]
entity =
biounit.PDBize(cif_entity, min_polymer_size=0
)
entity =
ost.mol.alg.CreateBU(cif_entity, biounit
)
if not entity.IsValid():
if not entity.IsValid():
raise IOError(
raise IOError(
"Provided file does not contain valid entity.")
"Provided file does not contain valid entity.")
_Rename(entity)
else:
else:
entity = io.LoadMMCIF(structure_path,
entity = io.LoadMMCIF(structure_path,
fault_tolerant = fault_tolerant)
fault_tolerant = fault_tolerant)
if len(entity.residues) == 0:
if len(entity.residues) == 0:
raise Exception(f"No residues found in file: {structure_path}")
raise Exception(f"No residues found in file: {structure_path}")
el
if sformat == "pdb"
:
el
se
:
entity = io.LoadPDB(structure_path, fault_tolerant = fault_tolerant)
entity = io.LoadPDB(structure_path, fault_tolerant = fault_tolerant)
if len(entity.residues) == 0:
if len(entity.residues) == 0:
raise Exception(f"No residues found in file: {structure_path}")
raise Exception(f"No residues found in file: {structure_path}")
else:
raise Exception(f"Unknown/ unsupported file extension found for "
f"file {structure_path}.")
# restore old loglevel and return
# restore old loglevel and return
ost.PopVerbosityLevel()
ost.PopVerbosityLevel()
entity.SetName(structure_path)
entity.SetName(structure_path)
return entity
return entity
def _DumpStructure(entity, structure_path, sformat):
if sformat == "mmcif":
io.SaveMMCIF(entity, structure_path)
else:
io.SavePDB(entity, structure_path)
def _AlnToFastaStr(aln):
def _AlnToFastaStr(aln):
""" Returns alignment as fasta formatted string
""" Returns alignment as fasta formatted string
"""
"""
...
@@ -714,7 +700,7 @@ def _GetAlignedResidues(aln):
...
@@ -714,7 +700,7 @@ def _GetAlignedResidues(aln):
"reference": ref_dct})
"reference": ref_dct})
return aligned_residues
return aligned_residues
def _Process(model, reference, args):
def _Process(model, reference, args
, model_format, reference_format
):
mapping = None
mapping = None
if args.chain_mapping is not None:
if args.chain_mapping is not None:
...
@@ -855,32 +841,16 @@ def _Process(model, reference, args):
...
@@ -855,32 +841,16 @@ def _Process(model, reference, args):
out["usalign_mapping"] = scorer.usalign_mapping
out["usalign_mapping"] = scorer.usalign_mapping
if args.dump_structures:
if args.dump_structures:
try:
# Dump model
io.SavePDB(scorer.model, model.GetName() + args.dump_suffix)
model_dump_filename = _AddSuffix(model.GetName(), args.dump_suffix)
except Exception as e:
_DumpStructure(model, model_dump_filename, model_format)
if "single-letter" in str(e) and args.model_biounit is not None:
# Dump reference
raise RuntimeError("Failed to dump processed model. PDB "
reference_dump_filename = _AddSuffix(reference.GetName(), args.dump_suffix)
"format only supports single character "
_DumpStructure(reference, reference_dump_filename, reference_format)
"chain names. This is likely the result of "
"chain renaming when constructing a user "
"specified biounit. Dumping structures "
"fails in this case.")
else:
raise
try:
io.SavePDB(scorer.target, reference.GetName() + args.dump_suffix)
except Exception as e:
if "single-letter" in str(e) and args.reference_biounit is not None:
raise RuntimeError("Failed to dump processed reference. PDB "
"format only supports single character "
"chain names. This is likely the result of "
"chain renaming when constructing a user "
"specified biounit. Dumping structures "
"fails in this case.")
else:
raise
return out
return out
def _Main():
def _Main():
args = _ParseArgs()
args = _ParseArgs()
...
@@ -890,15 +860,19 @@ def _Main():
...
@@ -890,15 +860,19 @@ def _Main():
raise RuntimeError("Only support CAD score when residue numbers in "
raise RuntimeError("Only support CAD score when residue numbers in "
"model and reference match. Use -rna flag if "
"model and reference match. Use -rna flag if "
"this is the case.")
"this is the case.")
reference_format = _GetStructureFormat(args.reference,
sformat=args.reference_format)
reference = _LoadStructure(args.reference,
reference = _LoadStructure(args.reference,
sformat=
args.
reference_format,
sformat=reference_format,
bu_idx=args.reference_biounit,
bu_idx=args.reference_biounit,
fault_tolerant = args.fault_tolerant)
fault_tolerant = args.fault_tolerant)
model_format = _GetStructureFormat(args.model,
sformat=args.model_format)
model = _LoadStructure(args.model,
model = _LoadStructure(args.model,
sformat=
args.
model_format,
sformat=model_format,
bu_idx=args.model_biounit,
bu_idx=args.model_biounit,
fault_tolerant = args.fault_tolerant)
fault_tolerant = args.fault_tolerant)
out = _Process(model, reference, args)
out = _Process(model, reference, args
, model_format, reference_format
)
# append input arguments
# append input arguments
out["model"] = args.model
out["model"] = args.model
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment