Commit a7bc08a2 authored by Bienchen's avatar Bienchen
Browse files

Script to run ProModII, removing scratch directory

parent 93be833b
"""Run ProModII as modelling engine"""
import os
import time
import subprocess
from ost import io
from ost import seq
from ost import mol
import promod3
def renumber_chains(seqres, ent, offset):
"""Postprocessing step to make sure, that all chains are correctly numbered.
Removed parts at the beginning or end are allowed.
But if there are modelling gaps, the numbering gets messed up!
"""
edx = ent.EditXCS()
chn = ent.chains[0]
rcaln = seq.alg.AlignToSEQRES(chn, seqres[offset[chn.name] :])
aligned_sequence = str(rcaln.FindSequence("atoms"))
start = -1
for i, aligned_aa in enumerate(aligned_sequence):
if aligned_aa != "-":
start = i + 1
break
if start != -1:
start += offset[chn.name]
edx.RenumberChain(chn, start, False)
else:
raise RuntimeError("Failed to renumber chain!")
def rename_chains(old_ent, names):
"""Fix ProMod's (sometimes) weird naming of chains."""
new_ent = mol.CreateEntity()
edx = new_ent.EditXCS()
if len(names) != old_ent.chain_count:
raise RuntimeError(
"Cannot rename model's chains: "
"different number of chains from template"
)
for chn, name in zip(old_ent.chains, names):
edx.InsertChain(name, chn, deep=True)
edx.UpdateICS()
io.processor.Process(new_ent)
return new_ent
def write_batch_file(scratch_path, target_sequence):
"""Create the ProModII batch file"""
with open(os.path.join(scratch_path, "model.batch.1"), "w") as btch_fh:
btch_fh.write("#FOLDFITMODE\n")
btch_fh.write("%raw-model.pdb\n")
btch_fh.write("#RENUMBER 1\n")
btch_fh.write(">TARGET\n")
btch_fh.write("%s;\n" % target_sequence)
btch_fh.write(".\n")
btch_fh.write("%model.align.submit.fasta\n")
def write_alignment_file(scratch_path, target_sequence, chain):
"""Create the ProModII alignment file"""
t_offsets = dict()
with open(
os.path.join(scratch_path, "model.align.submit.fasta"), "w"
) as aln_fh:
aln_fh.write("#SPDBV Foldfit-like Alignment\n")
aln_fh.write(">Target\n")
aln_fh.write(">EXPDB\n")
aln_fh.write("*\n")
tpl_seq = seq.CreateSequence(
str(chain.name), "-" * len(target_sequence)
)
for res in chain.residues:
tpl_seq[res.number.num - 1] = res.one_letter_code
for i, trg in enumerate(target_sequence):
aln_fh.write(" %s" % trg)
if tpl_seq[i] != "-":
aln_fh.write("%s\n" % tpl_seq[i])
else:
aln_fh.write(" \n")
t_offsets[chain.name] = chain.residues[0].number.num - 1
aln_fh.write("*\n")
return t_offsets
def execute_promod(model_path, scratch_path, trg_seq, model_ost_ent):
"""Create a protein model using ProModII, including all the preparations.
Calling ProModII requires some preparation. What we're interested in is raw
modelling time by ProModII. This is returned by the execute_promod function.
"""
write_batch_file(scratch_path, trg_seq)
t_offsets = write_alignment_file(
scratch_path, trg_seq, model_ost_ent.chains[0]
)
# execute
poh = open(os.path.join(scratch_path, "promod.stdout"), "w")
peh = open(os.path.join(scratch_path, "promod.stderr"), "w")
cwd = os.getcwd()
os.chdir(scratch_path)
io.SavePDB(model_ost_ent, "raw-model.pdb")
os.environ["SPDBV_USRSTUFF"] = "."
os.environ["SPDBV_TEMP"] = "."
os.environ["SPDBV_BASE"] = os.getenv("SPDBV_BASE")
os.environ["SPDBV_DOWNLOAD"] = scratch_path
os.environ["EXPDB"] = "/scicore/data/databases/EXPDB"
command = (
os.path.join(os.getenv("EBROOTPROMODII"), "bin", "promod")
+ " model.batch.1"
)
pmd_model_name = "model.batch.1_E.pdb"
pm_modelling_time = 0.0
try:
pm_modelling_start_time = time.time()
chld_ps = subprocess.Popen(command, shell=True, stdout=poh, stderr=peh)
exit_code = chld_ps.wait()
pm_modelling_time = time.time() - pm_modelling_start_time
if exit_code != 0:
raise RuntimeError(
"ProMod did not finish properly, exit code: %d" % exit_code
)
if not os.path.exists(pmd_model_name):
raise RuntimeError(
"ProMod did not produce a model file (%s), " % pmd_model_name
+ "most likely it did not finish properly"
)
except Exception as exc:
os.chdir(cwd)
poh.close()
peh.close()
raise RuntimeError("ProMod failed: %s" % str(exc)) from exc
# try to load the model before copying it to model.pdb. There are a few cases
# where ProMod produces invalid PDB files. We should catch such errors and
# report a failure.
try:
pmd_model = io.LoadPDB(pmd_model_name)
# In case of oligo the chain are renamed by promod from A to Z
# but the order is the same as in the raw model.
# We need the original template names to preserve the mapping
# of targets.
pmd_model = rename_chains(pmd_model, [model_ost_ent.chains[0].name])
# In case of oligomers, promod assigns ongoing residue numbers.
# The first residue of the second chain will be numbered with the number
# of the last residue of the first chain + 1. We have to renumber the
# whole thing according to the seqres.
renumber_chains(trg_seq, pmd_model, t_offsets)
except Exception as exc:
os.chdir(cwd)
poh.close()
peh.close()
raise RuntimeError(
"ProMod failed: Could not read model file: %s" % str(exc)
) from exc
try:
os.remove(pmd_model_name)
os.chdir(cwd)
io.SavePDB(pmd_model, model_path)
except Exception as exc:
os.chdir(cwd)
poh.close()
peh.close()
raise RuntimeError(
"ProMod failed: (renaming model file) %s" % str(exc)
) from exc
os.chdir(cwd)
poh.close()
peh.close()
return pm_modelling_time
if __name__ == "__main__":
in_dir = "cameo_benchmark" # pylint: disable=invalid-name
out_dir = "promodII_models" # pylint: disable=invalid-name
scratch_dir = "scratch" # pylint: disable=invalid-name
if not os.path.exists(out_dir):
os.makedirs(out_dir)
if not os.path.exists(scratch_dir):
os.makedirs(scratch_dir)
targets = set(
f[:6] for f in os.listdir(in_dir)
) # pylint: disable=invalid-name
sb_i = 0 # pylint: disable=invalid-name
modelling_time = 0.0 # pylint: disable=invalid-name
for t in sorted(targets):
print("TARGET", t)
tpl_path = os.path.join(in_dir, t + "_tpl.pdb")
aln_path = os.path.join(in_dir, t + "_aln.fasta")
out_path = os.path.join(out_dir, t + ".pdb")
if os.path.exists(tpl_path) and os.path.exists(aln_path):
aln = io.LoadAlignment(aln_path)
template = io.LoadPDB(tpl_path).CreateFullView()
aln.AttachView(1, template)
rawmodel = promod3.modelling.BuildRawModel(aln)
modelling_time += execute_promod(
out_path,
scratch_dir,
aln.GetSequence(0).GetGaplessString(),
rawmodel.model,
)
sb_i += 1
if sb_i == 1:
break
print("full modelling time: ", modelling_time)
>P1;template
structureX:template:7:A:131:A:undefined:undefined:-1.0:-1.0
AKFHVEGEVYCNVCHSRNLINELSERMAGAQVQLDCKD-DSKKVIYSIGGETDQDGVYRLPVVGYHED--CEIKLVKSSRPDCSEIPKLAKGTIQTSKVDLSKNTTITEKTRHVKPLSFRAKTDAPGC*
>P1;target
sequence:target:1:A:123:A: : :0.0:0.0
SQFYIQGQVYCDTCRA-RFITELSEFIPGAGVRLQCKDGENGKITFTEVGYTRAEGLYSMLIERDHKNEFCEITLLSSSRKDCDEIP--IEGWVKPS-LKFMLNTVNG-TTRTINPLGFFKKEALPKC*
\ No newline at end of file
{
"options": {
"angle_tolerance": 15.0,
"bond_tolerance": 15.0,
"c_alpha_only": false,
"chain_mapping": null,
"clean_element_column": true,
"compound_library": "/home/schdaude/prog/ost/build/stage/share/openstructure/compounds.chemlib",
"consistency_checks": true,
"cwd": "/home/schdaude/workspace/promod3_pipeline_benchmark/modelling",
"dump_structures": false,
"dump_suffix": ".compare.structures.pdb",
"fault_tolerant": false,
"inclusion_radius": 15.0,
"lddt": true,
"map_nonstandard_residues": true,
"model": "promod_models/6XWX_E.pdb",
"model_selection": "",
"molck": true,
"output": "scratch/scores.json",
"parameter_file": "/home/schdaude/prog/ost/build/stage/share/openstructure/stereo_chemical_props.txt",
"qs_max_mappings_extensive": 1000000,
"qs_rmsd": false,
"qs_score": false,
"reference": "cameo_benchmark/6XWX_E.pdb",
"reference_selection": "",
"remove": [
"oxt",
"hyd",
"unk",
"nonstd"
],
"residue_number_alignment": true,
"save_per_residue_scores": false,
"sequence_separation": 0,
"structural_checks": true,
"verbosity": 3
},
"result": {
"6XWX_E.pdb": {
"6XWX_E.pdb": {
"info": {
"mapping": {
"alignments": [
">reference:E\n--------SAGLEVLFQGPMEDGEVNDVVHPQVRAHINSLVSALGGISIDD--GYKLGDDALEVLRDLKKWIRFYDEKTNRMDVARCLAEANIVSTDLLHILALWTPNENSNKYKARIALACFELMVPLTWPIEKDRETMTINHHRHIPVLQLAQLGYKRAIINYDAAPILSTAVRVALPAMAMPIGERTARDQGIIKLILYFLRNIAMITPPP----------ISRSALIDAFSYQDIFLTLLTIASNMGEDFRTEDVIVMEIIFHLVKRVDPKGQQLGSFVSDFLDSGFNPLFSHIRKSLEREAPHVLHYHQSQFFYLVAWFLEAERARRSSFNLIASVLTQEMFIALNRALDRAYGDKDWRLLTSAMRCFTQILLTVQEMFDSGNDEDQEIADNILSRLFYEESTHDAVANIVRTYKDQGFEYLDACTELAHTFLRILEAYSKQNV-------SADDEKMAEKTSQERKFDFKRFAARFTPQGVVDTFVTFTKYYRDLDDSQLKRAHRYFYRVAFKQEMSVMLFRLDIIHLFYNMIKGPEPLDKNSPMYKEWEELVRQILKRCIRKLEERPALFTEILFSKINSTAYYLE---\n>model:A\n----------------------------VHPQVRAHINSLVSALGGISIDDDGGYKLGDDALEVLRDLKKWIRFYDEKTNRMDVARCLAEANIVSTDLLHILALWTPNENSNKYKARIALACFELMVPLTWPIEKDRETMTINHHRHIPVLQLAQLGYKRAIINYDAAPILSTAVRVALPAMAMPIGERTARDQGIIKLILYFLRNIAMITPPPGVKYEGDESQISRSALIDAFSYQDIFLTLLTIASNMGEDFRTEDVIVMEIIFHLVKRVDPKGQQLGSFVSDFLDSGFNPLFSHIRKSLEREAPHVLHYHQSQFFYLVAWFLEAERARRSSFNLIASVLTQEMFIALNRALDRAYGDKDWRLLTSAMRCFTQILLTVQEMFDSGNDEDQEIADNILSRLFYEESTHDAVANIVRTYKDQGFEYLDACTELAHTFLRILEAYSKQNVDLQVRSGSADDEKMAEKTSQERKFDFKRFAARFTPQGVVDTFVTFTKYYRDLDDSQLKRAHRYFYRVAFKQEMSVMLFRLDIIHLFYNMIKGPEPLDKNSPMYKEWEELVRQILKRCIRKLEERPALFTEILFSKINSTAYYLEYGF"
],
"chain_mapping": {
"E": "A"
},
"chain_mapping_scheme": "strict"
},
"residue_names_consistent": true
},
"lddt": {
"oligo_lddt": {
"error": "",
"global_score": 0.6644092076003631,
"status": "SUCCESS"
},
"single_chain_lddt": [
{
"conserved_contacts": 2383087,
"error": "",
"global_score": 0.6644092202186584,
"model_chain": "A",
"reference_chain": "E",
"status": "SUCCESS",
"total_contacts": 3586776
}
],
"weighted_lddt": {
"error": "",
"global_score": 0.6644092202186584,
"status": "SUCCESS"
}
}
}
}
}
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 60563.70312 0.0000 0.0000 1 2.877e+06
10 181.68314 0.1020 0.5631 21 855.313
20 52.16914 0.0536 0.2493 41 69.047
30 29.14375 0.0396 0.1810 61 28.0214
40 20.53514 0.0291 0.2205 81 11.0789
50 15.92380 0.0208 0.1445 101 6.88604
60 13.02964 0.0185 0.1411 121 4.19292
70 11.20910 0.0175 0.1679 141 2.54498
80 10.14999 0.0126 0.0735 161 1.55819
90 9.34199 0.0134 0.0628 181 1.38211
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 94.64766 0.0000 0.0000 1 103.262
10 91.07736 0.0070 0.0535 21 61.5848
20 87.97805 0.0138 0.0870 42 161.941
30 85.36501 0.0058 0.0838 62 51.2469
40 82.95379 0.0060 0.0686 82 35.9748
50 79.40267 0.0134 0.1090 102 60.7359
60 76.71658 0.0102 0.0468 122 54.3674
67 75.58187 0.0008 0.0072 138 12.9285
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 391.65182 0.0000 0.0000 1 350.417
1 391.41476 0.0006 0.0050 3 380.809
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 877.50836 0.0000 0.0000 1 2556.4
1 876.73138 0.0006 0.0047 3 2747.21
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 1134.88611 0.0000 0.0000 1 7705.52
10 1000.83966 0.0086 0.0657 21 13771.4
20 932.80261 0.0080 0.0699 41 9795.58
30 893.04047 0.0086 0.0779 61 5054.65
40 868.82751 0.0084 0.0573 81 3898.28
47 856.21259 0.0020 0.0096 96 2384.02
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 1025.33899 0.0000 0.0000 1 6761.96
10 955.29193 0.0061 0.1173 21 9485.02
20 914.94275 0.0065 0.1145 41 6422.65
30 856.42963 0.0053 0.1583 61 10105.5
40 830.01257 0.0025 0.0364 81 2799.31
50 817.10297 0.0032 0.0338 101 1919.16
60 807.19653 0.0057 0.0463 121 1596.48
70 793.37280 0.0111 0.0689 141 3893.75
80 776.24243 0.0119 0.0759 161 3799.31
90 760.89026 0.0083 0.0539 181 2168.79
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 1485.92944 0.0000 0.0000 1 582485
10 1021.62262 0.0109 0.0972 21 19963.1
20 948.96161 0.0081 0.0712 42 7361.55
30 914.27484 0.0050 0.0274 62 3808.09
40 895.87482 0.0062 0.0378 82 2530.7
50 884.23193 0.0055 0.0316 102 1984.64
60 876.04510 0.0041 0.0219 122 1637.44
62 874.62701 0.0009 0.0062 127 1183.02
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 1855.78113 0.0000 0.0000 1 154868
10 1283.78052 0.0090 0.1857 21 54797.2
20 1131.59827 0.0048 0.0610 41 16376.7
30 1063.99109 0.0024 0.0281 62 5617.17
40 1035.85352 0.0043 0.0585 82 4426.48
50 1014.56659 0.0010 0.0117 103 3058.38
51 1013.75696 0.0006 0.0051 105 1040.79
# Conjugate gradients optimization
# Step Current energy Av shift Mx shift Funcs Gradient
0 3462.55737 0.0000 0.0000 1 1.8508e+06
10 1309.17322 0.0050 0.0948 21 24077.4
20 1231.13525 0.0050 0.0362 41 8814.89
30 1195.27966 0.0064 0.0337 61 6376.05
40 1166.43323 0.0078 0.0418 81 4807.13
50 1141.88501 0.0076 0.0427 101 4287.35
60 1123.96899 0.0078 0.0435 121 3046.22
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# MTH NRNG 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 1 2 0.010 0.010 0.010 0.010 0.000 0.000 0.000 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.000 0.000 0.000 0.010
2 1 4 0.100 0.100 0.100 0.100 0.000 0.000 0.000 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.000 0.000 0.000 0.100
3 1 6 0.500 0.500 0.500 0.500 0.000 0.000 0.000 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.000 0.000 0.000 0.500
4 1 10 1.000 1.000 1.000 1.000 0.000 0.000 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 0.000 0.000 1.000
5 1 20 1.000 1.000 1.000 1.000 0.000 0.000 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 0.000 0.000 1.000
6 1 30 1.000 1.000 1.000 1.000 0.000 0.000 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 0.000 0.000 1.000
7 1 50 1.000 1.000 1.000 1.000 0.010 0.010 0.010 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 0.010 0.010 0.010 1.000
8 1 80 1.000 1.000 1.000 1.000 0.100 0.100 0.100 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 0.100 0.100 0.100 1.000
9 1 120 1.000 1.000 1.000 1.000 0.500 0.500 0.500 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 0.500 0.500 0.500 1.000
10 1 200 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment