From 8ee6563cb2a05e5d224bf137d003f3c2dc5c2868 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Tue, 9 May 2023 18:07:49 +0200 Subject: [PATCH] binding to call external USalign executable --- modules/bindings/doc/tmtools.rst | 41 ++++++---- modules/bindings/pymod/tmtools.py | 127 ++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 17 deletions(-) diff --git a/modules/bindings/doc/tmtools.rst b/modules/bindings/doc/tmtools.rst index 186026653..cc08e010d 100644 --- a/modules/bindings/doc/tmtools.rst +++ b/modules/bindings/doc/tmtools.rst @@ -5,7 +5,7 @@ :synopsis: Sequence dependent and independent structure superposition The :mod:`~ost.bindings.tmtools` module provides access to the structural -superposition programs TMscore, Tmalign and MMalign developed by Y. Zhang +superposition programs TMscore and Tmalign developed by Y. Zhang and J. Skolnick. These programs superpose a model onto a reference structure, using the positions of the Calpha atoms only. While at their core, these programs essentially use the same algorithm, they differ on how the Calphas are @@ -17,14 +17,15 @@ Citation: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710 Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9 -Besides using the standalone TM-align program, ost also provides a wrapper +Besides using the standalone TM-align program, ost also provides wrappers around USalign as published in: Chengxin Zhang, Morgan Shine, Anna Marie Pyle, Yang Zhang (2022) Nat Methods -The advantage is that no intermediate files must be generated, a wrapper on the -c++ layer is used instead. +USalign can be used as external standalone tool. Alternatively, ost allows to +directly inject structural data in the USAlign c++ layer. The advantage of that +is that no intermediate files must be generated. Distance measures used by TMscore @@ -77,15 +78,27 @@ Usage of TMscore .. autoclass:: ost.bindings.tmtools.TMScoreResult +Usage of USalign +-------------------------------------------------------------------------------- + +For higher order complexes, ost provides access to USalign. This corresponds to +calling USalign with the preferred way of comparing full biounits: + +.. code-block:: bash + + USalign mdl.pdb ref.pdb -mm 1 -ter 0 + +.. autofunction:: ost.bindings.tmtools.USAlign -TMalign C++ wrapper + +C++ wrappers -------------------------------------------------------------------------------- .. currentmodule:: ost.bindings -Instead of calling the TMalign executable, ost also provides a wrapper around -its C++ implementation. The advantage is that no intermediate files need to be -generated in order to call the executable. +Instead of calling the external executables, ost also provides a wrapper around +the USalign c++ implementation which is shipped with the ost source code. +The advantage is that no intermediate files need to be generated. .. code-block:: python @@ -118,8 +131,8 @@ generated in order to call the executable. .. method:: WrappedTMAlign(chain1, chain2, [fast=False]) - Takes two chain views and runs TMalign with *chain2* as reference. - The positions and sequences are directly extracted from the chain + Takes two chain views and runs TMalign from USAlign with *chain2* as + reference. The positions and sequences are directly extracted from the chain residues for every residue that fulfills: * peptide linking and valid CA atom OR nucleotide linking and valid C3' @@ -161,13 +174,7 @@ generated in order to call the executable. respectively are not consistent in size. For higher order complexes, ost provides access to the MMalign functionality -from USalign. This corresponds to calling USalign with the preferred way of -comparing full biounits: - -.. code-block:: bash - - USalign mdl.pdb ref.pdb -mm 1 -ter 0 - +from USalign. .. class:: MMAlignResult(rmsd, tm_score, transform, aligned_length, alignments,\ ent1_mapped_chains, ent2_mapped_chains) diff --git a/modules/bindings/pymod/tmtools.py b/modules/bindings/pymod/tmtools.py index d58691c04..36f7e9cf9 100644 --- a/modules/bindings/pymod/tmtools.py +++ b/modules/bindings/pymod/tmtools.py @@ -73,6 +73,90 @@ def _ParseTmAlign(lines,lines_matrix): return ost.bindings.TMAlignResult(rmsd, tm_score, tm_score_swapped, aln_length, tf, alignment) +def _ParseUSAlign(lines,lines_matrix): + + # stuff that is immediately parsed + rmsd = None + tm_score = None + tm_score_swapped = None + aligned_length = None + + # first goes into intermediate data structures + aln_data = list() + mapping_data1 = list() + mapping_data2 = list() + in_aln = False + + for line in lines: + if in_aln: + if len(line.strip()) == 0: + in_aln = False + else: + aln_data.append(line.strip('*')) + elif line.startswith("Name of Structure_1:"): + tmp = [item.strip() for item in line.split()[3].split(':')[1:]] + for item in tmp: + if len(item) > 0: + mapping_data1.append(item.split(',')[1]) + else: + mapping_data1.append("") + elif line.startswith("Name of Structure_2:"): + tmp = [item.strip() for item in line.split()[3].split(':')[1:]] + for item in tmp: + if len(item) > 0: + mapping_data2.append(item.split(',')[1]) + else: + mapping_data2.append("") + elif line.startswith("Aligned length="): + data = [item.strip() for item in line.split(',')] + for item in data: + if item.startswith("Aligned length="): + aligned_length = int(item.split("=")[1]) + elif item.startswith("RMSD="): + rmsd = float(item.split("=")[1]) + elif line.startswith("TM-score="): + if "(normalized by length of Structure_1" in line: + tm_score_swapped = float(line.split('(')[0].split('=')[1].strip()) + elif "(normalized by length of Structure_2" in line: + tm_score = float(line.split('(')[0].split('=')[1].strip()) + elif line.startswith("(\":\" denotes residue pairs of"): + in_aln = True + + assert(len(aln_data)==3) + aln_sequences1 = aln_data[0].split('*') + aln_sequences2 = aln_data[2].split('*') + + # do mapping/aln data + alns = ost.seq.AlignmentList() + ent1_mapped_chains = ost.StringList() + ent2_mapped_chains = ost.StringList() + assert(len(mapping_data1) == len(mapping_data2)) + assert(len(aln_sequences1) == len(aln_sequences2)) + assert(len(mapping_data1) == len(aln_sequences1)) + for a, b, c, d in zip(mapping_data1, mapping_data2, + aln_sequences1, aln_sequences2): + if len(a) > 0 and len(b) > 0: + ent1_mapped_chains.append(a) + ent2_mapped_chains.append(b) + assert(len(c) == len(d)) + aln = seq.CreateAlignment() + aln.AddSequence(seq.CreateSequence(a, c)) + aln.AddSequence(seq.CreateSequence(b, d)) + alns.append(aln) + + # parse transformation matrix + tf1=[float(i.strip()) for i in lines_matrix[2].split()[1:]] + tf2=[float(i.strip()) for i in lines_matrix[3].split()[1:]] + tf3=[float(i.strip()) for i in lines_matrix[4].split()[1:]] + mat = geom.Mat4(tf1[0], tf1[1], tf1[2], tf1[3], + tf2[0], tf2[1], tf2[2], tf2[3], + tf3[0], tf3[1], tf3[2], tf3[3], + 0.0, 0.0, 0.0, 1.0) + + return ost.bindings.MMAlignResult(rmsd, tm_score, tm_score_swapped, + aligned_length, mat, alns, + ent1_mapped_chains, ent2_mapped_chains) + def _RunTmAlign(tmalign, tmp_dir): model1_filename=os.path.join(tmp_dir, 'model01.pdb') model2_filename=os.path.join(tmp_dir, 'model02.pdb') @@ -93,6 +177,21 @@ def _RunTmAlign(tmalign, tmp_dir): matrix_file.close() return _ParseTmAlign(lines,lines_matrix) +def _RunUSAlign(usalign, tmp_dir): + model1_filename=os.path.join(tmp_dir, 'model01.pdb') + model2_filename=os.path.join(tmp_dir, 'model02.pdb') + mat_filename = os.path.join(tmp_dir, "mat.txt") + usalign_path=settings.Locate('USalign', explicit_file_name=usalign) + command = f"{usalign_path} {model1_filename} {model2_filename} -mm 1 -ter 0 -m {mat_filename}" + ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) + stdout,_=ps.communicate() + lines=stdout.decode().splitlines() + if (len(lines))<22: + _CleanupFiles(tmp_dir) + raise RuntimeError("USalign superposition failed") + with open(mat_filename) as fh: + lines_matrix = fh.readlines() + return _ParseUSAlign(lines,lines_matrix) class TMScoreResult: """ @@ -218,3 +317,31 @@ def TMScore(model1, model2, tmscore=None): model1.handle.EditXCS().ApplyTransform(result.transform) _CleanupFiles(tmp_dir_name) return result + + +def USAlign(model1, model2, usalign=None): + """ + Performs a sequence independent superposition of model1 onto model2, the + reference. Can deal with multimeric complexes and RNA. + + Creates temporary model files on disk and runs USalign with: + ``USalign model1.pdb model2.pdb -mm 1 -ter 0 -m rotmat.txt`` + + :param model1: The model structure. If the superposition is successful, will + be superposed onto the reference structure + :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` + :param model2: The reference structure + :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` + :param usalign: If not None, the path to the USalign executable. Searches + for executable with name ``USalign`` in PATH if not given. + :returns: The result of the superposition + :rtype: :class:`ost.bindings.MMAlignResult` + + :raises: :class:`~ost.settings.FileNotFound` if executable could not be located. + :raises: :class:`RuntimeError` if the superposition failed + """ + tmp_dir_name=_SetupFiles((model1, model2)) + result=_RunUSAlign(usalign, tmp_dir_name) + model1.handle.EditXCS().ApplyTransform(result.transform) + _CleanupFiles(tmp_dir_name) + return result -- GitLab