diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 9e6c10b3c652ab22ad661c16c7ff54728ad3cd45..45543aa00e52f3ad79f2c502e78dcd6bc0cc4093 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -31,6 +31,9 @@ Changes in Release x.x.x nasty loops that interact with the ligand in the model which would not be penalized by classic lDDT! * Remove seq.alg.MATCH and seq.alg.IDENTITY preset substitution matrices + * Enable parasail (https://github.com/jeffdaily/parasail) as drop-in + replacement for naive LocalAlign/GlobalAlign/SemiGlobalAlign implementations. + Must be enabled at compile time - see installation instructions. * Several bug fixes and improvements. Changes in Release 2.7.0 diff --git a/modules/bindings/doc/bindings.rst b/modules/bindings/doc/bindings.rst index 9b2106fc6bc8b170f5acdd349e34e2342bb66aaf..06df24a3074a0b8c602e69763a1f7e68aefc3f24 100644 --- a/modules/bindings/doc/bindings.rst +++ b/modules/bindings/doc/bindings.rst @@ -25,4 +25,3 @@ So far, the binding module includes: lga cadscore dockq - parasail diff --git a/modules/bindings/doc/parasail.rst b/modules/bindings/doc/parasail.rst deleted file mode 100644 index 1750163aa68b0c7b91191d540ad0774b2001851c..0000000000000000000000000000000000000000 --- a/modules/bindings/doc/parasail.rst +++ /dev/null @@ -1,74 +0,0 @@ -Parasail - Pairwise sequence alignments -================================================================================ - -Basic access to pairwise sequence alignment functionality in -`parasail <https://github.com/jeffdaily/parasail/>`_ from -within OpenStructure. - -Citation: - - Jeff Daily. Parasail: SIMD C library for global, semi-global, - and local pairwise sequence alignments. (2016) BMC Bioinformatics - -Parasail support must be enabled at compile time - see installation -instructions. Parasail allows to choose from various strategies but -for the sake of simplicity, this Python binding always calls -``parasail_<mode>_trace_scan_sat`` which seems reasonably fast -across the global, semi-global and local modes. -See parasail documentation for more information. Example usage: - -.. code-block:: python - - from ost import bindings - - print("parasail available?", bindings.ParasailAvailable()) - - s1 = seq.CreateSequence("A", "HEAGAWGHEE") - s2 = seq.CreateSequence("B", "PAWHEA") - - aln = bindings.ParaGlobalAlign(s1, s2, gap_open_penalty=11, - gap_extension_penalty=1, - matrix="blosum62") - - print(aln.ToString()) - - -.. method:: ParasailAvailable() - - returns if OpenStructure has been compiled with parasail support - -.. method:: ParaGlobalAlign(s1, s2, gap_open_penalty=11, \ - gap_extension_penalty=1, matrix="blosum62") - - Calls parasail_nw_trace_scan_sat to perform a Needleman-Wunsch global - alignment. - - :param s1: First sequence - :type s1: :class:`ost.seq.SequenceHandle` - :param s2: Second sequence - :type s2: :class:`ost.seq.SequenceHandle` - :param gap_open_penalty: Gap opening penalty for dynamic programming table - :type gap_open_penalty: :class:`int` - :param gap_extension_penalty: Gap extension penalty for dynamic programming - table - :type gap_extension_penalty: :class:`int` - :param matrix: Substution matrix - Anything that can be resolved by parasail. - E.g. pretty much the full BLOSUM/PAM families of matrices - ("blosum62", "pam100",...) for protein sequences, or - "nuc44"/"dnafull" for nucleotides. - :returns: :class:`ost.seq.AlignmentHandle` - - -.. method:: ParaSemiGlobalAlign(s1, s2, gap_open_penalty=11, \ - gap_extension_penalty=1, matrix="blosum62") - - Same as :meth:`ParaGlobalAlign` but calling parasail_sg_trace_scan_sat to - perform a semi-global alignment. - - -.. method:: ParaLocalAlign(s1, s2, gap_open_penalty=11, \ - gap_extension_penalty=1, matrix="blosum62") - - Same as :meth:`ParaGlobalAlign` but calling parasail_sw_trace_scan_sat to - perform a Smith-Waterman local alignment. The sequences in the returned - alignment might be subsequences of *s1*/*s2* but have offsets set. diff --git a/modules/bindings/pymod/CMakeLists.txt b/modules/bindings/pymod/CMakeLists.txt index afe90d392dae584309fdf8aa70a6d1b167f17c98..5e3f670ad285bd8a1a4910f70fa27323595cd9f6 100644 --- a/modules/bindings/pymod/CMakeLists.txt +++ b/modules/bindings/pymod/CMakeLists.txt @@ -21,7 +21,6 @@ dockq.py set(OST_BINDINGS_PYMOD_SOURCES export_tmalign.cc - export_parasail.cc wrap_bindings.cc ) diff --git a/modules/bindings/pymod/export_parasail.cc b/modules/bindings/pymod/export_parasail.cc deleted file mode 100644 index ea8a987c230f763d57bda2b29dda49cfa56652ed..0000000000000000000000000000000000000000 --- a/modules/bindings/pymod/export_parasail.cc +++ /dev/null @@ -1,42 +0,0 @@ -//------------------------------------------------------------------------------ -// This file is part of the OpenStructure project <www.openstructure.org> -// -// Copyright (C) 2008-2020 by the OpenStructure authors -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation; either version 3.0 of the License, or (at your option) -// any later version. -// This library is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -// details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with this library; if not, write to the Free Software Foundation, Inc., -// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -//------------------------------------------------------------------------------ - -#include <boost/python.hpp> -#include <ost/bindings/wrap_parasail.hh> -using namespace boost::python; - -void export_parasail() { - - def("ParaLocalAlign", &ost::bindings::ParaLocalAlign, (arg("s1"), arg("s2"), - arg("gap_open_penalty")=11, - arg("gap_extension_penalty")=1, - arg("matrix")="blosum62")); - - def("ParaGlobalAlign", &ost::bindings::ParaGlobalAlign, (arg("s1"), arg("s2"), - arg("gap_open_penalty")=11, - arg("gap_extension_penalty")=1, - arg("matrix")="blosum62")); - - def("ParaSemiGlobalAlign", &ost::bindings::ParaSemiGlobalAlign, (arg("s1"), arg("s2"), - arg("gap_open_penalty")=11, - arg("gap_extension_penalty")=1, - arg("matrix")="blosum62")); - - def("ParasailAvailable", &ost::bindings::ParasailAvailable); -} diff --git a/modules/bindings/pymod/wrap_bindings.cc b/modules/bindings/pymod/wrap_bindings.cc index c41157fc7a718e0778e5c8cab3319179caef51aa..c60893f21d62ca6082c5d57529ef46ccb40a88f3 100644 --- a/modules/bindings/pymod/wrap_bindings.cc +++ b/modules/bindings/pymod/wrap_bindings.cc @@ -24,10 +24,8 @@ using namespace boost::python; void export_TMAlign(); -void export_parasail(); BOOST_PYTHON_MODULE(_ost_bindings) { export_TMAlign(); - export_parasail(); } diff --git a/modules/bindings/src/CMakeLists.txt b/modules/bindings/src/CMakeLists.txt index fa8e8d3961efa374afdf007d3316cb94f7e88683..2ae1b882de59efa1ccb99947d4c7c44c99678098 100644 --- a/modules/bindings/src/CMakeLists.txt +++ b/modules/bindings/src/CMakeLists.txt @@ -5,13 +5,11 @@ if (COMPILE_TMTOOLS) endif() set(OST_BINDINGS_SOURCES -wrap_tmalign.cc -wrap_parasail.cc) +wrap_tmalign.cc) set(OST_BINDINGS_HEADERS -wrap_tmalign.hh -wrap_parasail.hh) +wrap_tmalign.hh) module(NAME bindings SOURCES ${OST_BINDINGS_SOURCES} HEADERS ${OST_BINDINGS_HEADERS} HEADER_OUTPUT_DIR ost/bindings - DEPENDS_ON ost_geom ost_mol ost_seq LINK ${PARASAIL_LIBRARY}) + DEPENDS_ON ost_geom ost_mol ost_seq) diff --git a/modules/bindings/src/wrap_parasail.hh b/modules/bindings/src/wrap_parasail.hh deleted file mode 100644 index 785965467778b3925f20d39982862f27d4322194..0000000000000000000000000000000000000000 --- a/modules/bindings/src/wrap_parasail.hh +++ /dev/null @@ -1,49 +0,0 @@ -//------------------------------------------------------------------------------ -// This file is part of the OpenStructure project <www.openstructure.org> -// -// Copyright (C) 2008-2024 by the OpenStructure authors -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation; either version 3.0 of the License, or (at your option) -// any later version. -// This library is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -// details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with this library; if not, write to the Free Software Foundation, Inc., -// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -//------------------------------------------------------------------------------ -#ifndef OST_BINDINGS_PARASAIL_H -#define OST_BINDINGS_PARASAIL_H - -#include <ost/seq/sequence_handle.hh> -#include <ost/seq/alignment_handle.hh> - -namespace ost { namespace bindings { - -ost::seq::AlignmentHandle ParaLocalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty = 11, - int gap_extension_penalty = 1, - const String& matrix="blosum62"); - -ost::seq::AlignmentHandle ParaGlobalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty = 11, - int gap_extension_penalty = 1, - const String& matrix="blosum62"); - -ost::seq::AlignmentHandle ParaSemiGlobalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty = 11, - int gap_extension_penalty = 1, - const String& matrix="blosum62"); - -bool ParasailAvailable(); - -}} //ns - -#endif diff --git a/modules/mol/alg/tests/test_qsscore.py b/modules/mol/alg/tests/test_qsscore.py index 8bf7c3e9aa3c28f38887c26dab84bd44ca65b877..a753ee45b5b62e80a9eb57aa50ea86fd5dc85f21 100644 --- a/modules/mol/alg/tests/test_qsscore.py +++ b/modules/mol/alg/tests/test_qsscore.py @@ -222,8 +222,17 @@ class TestQSScore(unittest.TestCase): res = mapper.GetRMSDMapping(ent_2, strategy="greedy_iterative") qs_scorer = QSScorer.FromMappingResult(res) score_result = qs_scorer.Score(res.mapping) - self.assertAlmostEqual(score_result.QS_global, 0.147, 2) - self.assertAlmostEqual(score_result.QS_best, 0.866, 2) + + # The alignments from parasail slightly differ. The sequence identities + # are in the range 40% but slightly lower for parasail alignments. + # however, the parasail alignments appear less gappy and "nicer". + # They nevertheless lead to lower QS-score. + if seq.alg.ParasailAvailable(): + self.assertAlmostEqual(score_result.QS_global, 0.14757304498883386, 2) + self.assertAlmostEqual(score_result.QS_best, 0.7878766697963304, 2) + else: + self.assertAlmostEqual(score_result.QS_global, 0.14797023263299844, 2) + self.assertAlmostEqual(score_result.QS_best, 0.8666616636985371, 2) def test_homo_1_switched_order(self): # different stoichiometry SOD @@ -236,8 +245,16 @@ class TestQSScore(unittest.TestCase): res = mapper.GetRMSDMapping(ent_2, strategy="greedy_iterative") qs_scorer = QSScorer.FromMappingResult(res) score_result = qs_scorer.Score(res.mapping) - self.assertAlmostEqual(score_result.QS_global, 0.147, 2) - self.assertAlmostEqual(score_result.QS_best, 0.866, 2) + # The alignments from parasail slightly differ. The sequence identities + # are in the range 40% but slightly lower for parasail alignments. + # however, the parasail alignments appear less gappy and "nicer". + # They nevertheless lead to lower QS-score. + if seq.alg.ParasailAvailable(): + self.assertAlmostEqual(score_result.QS_global, 0.14757304498883386, 2) + self.assertAlmostEqual(score_result.QS_best, 0.7878766697963304, 2) + else: + self.assertAlmostEqual(score_result.QS_global, 0.14797023263299844, 2) + self.assertAlmostEqual(score_result.QS_best, 0.8666616636985371, 2) def test_homo_2(self): # broken cyclic symmetry diff --git a/modules/seq/alg/doc/seqalg.rst b/modules/seq/alg/doc/seqalg.rst index 31274af12bbbbe32bfb0ca17083f0acf9738b458..a293192f77d188cf4dd56428edaaf08fe2c6a01e 100644 --- a/modules/seq/alg/doc/seqalg.rst +++ b/modules/seq/alg/doc/seqalg.rst @@ -109,6 +109,97 @@ Algorithms for Alignments aligned to only gaps, is considered highly conserved (depending on the number of gap sequences). +.. function:: ShannonEntropy(aln, ignore_gaps=True) + + Returns the per-column Shannon entropies of the alignment. The entropy + describes how conserved a certain column in the alignment is. The higher + the entropy is, the less conserved the column. For a column with no amino + aids, the entropy value is set to NAN. + + :param aln: Multiple sequence alignment + :type aln: :class:`~ost.seq.AlignmentHandle` + :param ignore_gaps: Whether to ignore gaps in the column. + :type ignore_gaps: bool + + :returns: List of column entropies + +.. autofunction:: ost.seq.alg.renumber.Renumber + +.. function:: SequenceIdentity(aln, ref_mode=seq.alg.RefMode.ALIGNMENT, seq_a=0, seq_b=1) + + Calculates the sequence identity between two sequences at index seq_a and seq_b in + a multiple sequence alignment. + + :param aln: multiple sequence alignment + :type aln: :class:`~ost.seq.AlignmentHandle` + :param ref_mode: influences the way the sequence identity is calculated. When + set to `seq.alg.RefMode.LONGER_SEQUENCE`, the sequence identity is + calculated as the number of matches divided by the length of the longer + sequence. If set to `seq.alg.RefMode.ALIGNMENT` (the default), the sequence + identity is calculated as the number of matches divided by the number of + aligned residues (not including the gaps). + :type ref_mode: int + :param seq_a: the index of the first sequence + :type seq_a: int + :param seq_b: the index of the second sequence + :type seq_b: int + :returns: sequence identity in the range 0 to 100. + :rtype: float + +.. function:: SequenceSimilarity(aln, subst_weight, normalize=false, seq_a=0, seq_b=1) + + Calculates the sequence similarity between two sequences at index seq_a and seq_b in + a multiple sequence alignment. + + :param aln: Multiple sequence alignment + :type aln: :class:`~ost.seq.AlignmentHandle` + :param subst_weight: the substitution weight matrix + (see the :ref:`BLOSUM Matrix<blosum>` section below) + :type subst_weight: :class:`~SubstWeightMatrix` + :param normalize: if set to True, normalize to the range of the + substitution weight matrix + :type normalize: bool + :param seq_a: the index of the first sequence + :type seq_a: int + :param seq_b: the index of the second sequence + :type seq_b: int + :returns: sequence similarity + :rtype: float + + +Create pairwise alignments +-------------------------------------------------------------------------------- + +OpenStructure provides naive implementations to create pairwise local, global +and semi-global alignments between two sequences: + +* :func:`LocalAlign` +* :func:`GlobalAlign` +* :func:`SemiGlobalAlign` + +The use of `parasail <https://github.com/jeffdaily/parasail/>`_ as a drop +in replacement is optional and provides significant speedups. +It must be enabled at compile time - see installation instructions. + +Reference: + + Jeff Daily. Parasail: SIMD C library for global, semi-global, + and local pairwise sequence alignments. (2016) BMC Bioinformatics + +Parasail allows to choose from various strategies but for the sake of +simplicity, this Python binding always calls +``parasail_<mode>_trace_scan_sat`` which seems reasonably fast across the +global, semi-global and local modes. See parasail documentation for more +information. + +You can always check if the alignment algorithms use parasail or the naive +implementations by calling: + +.. function:: ParasailAvailable() + + Returns True if OpenStructure has been compiled with parasail support, + False if not. + .. function:: LocalAlign(seq1, seq2, subst_weight, gap_open=-5, gap_ext=-2) Performs a Smith/Waterman local alignment of *seq1* and *seq2* and returns @@ -164,20 +255,6 @@ Algorithms for Alignments :param gap_ext: The gap extension penalty. Must be a negative number :returns: Best-scoring alignment of *seq1* and *seq2*. -.. function:: ShannonEntropy(aln, ignore_gaps=True) - - Returns the per-column Shannon entropies of the alignment. The entropy - describes how conserved a certain column in the alignment is. The higher - the entropy is, the less conserved the column. For a column with no amino - aids, the entropy value is set to NAN. - - :param aln: Multiple sequence alignment - :type aln: :class:`~ost.seq.AlignmentHandle` - :param ignore_gaps: Whether to ignore gaps in the column. - :type ignore_gaps: bool - - :returns: List of column entropies - .. function:: SemiGlobalAlign(seq1, seq2, subst_weight, gap_open=-5, gap_ext=-2) Performs a semi-global alignment of *seq1* and *seq2* and returns the best- @@ -212,49 +289,6 @@ Algorithms for Alignments :param gap_ext: The gap extension penalty. Must be a negative number :returns: best-scoring alignment of *seq1* and *seq2*. -.. autofunction:: ost.seq.alg.renumber.Renumber - -.. function:: SequenceIdentity(aln, ref_mode=seq.alg.RefMode.ALIGNMENT, seq_a=0, seq_b=1) - - Calculates the sequence identity between two sequences at index seq_a and seq_b in - a multiple sequence alignment. - - :param aln: multiple sequence alignment - :type aln: :class:`~ost.seq.AlignmentHandle` - :param ref_mode: influences the way the sequence identity is calculated. When - set to `seq.alg.RefMode.LONGER_SEQUENCE`, the sequence identity is - calculated as the number of matches divided by the length of the longer - sequence. If set to `seq.alg.RefMode.ALIGNMENT` (the default), the sequence - identity is calculated as the number of matches divided by the number of - aligned residues (not including the gaps). - :type ref_mode: int - :param seq_a: the index of the first sequence - :type seq_a: int - :param seq_b: the index of the second sequence - :type seq_b: int - :returns: sequence identity in the range 0 to 100. - :rtype: float - -.. function:: SequenceSimilarity(aln, subst_weight, normalize=false, seq_a=0, seq_b=1) - - Calculates the sequence similarity between two sequences at index seq_a and seq_b in - a multiple sequence alignment. - - :param aln: Multiple sequence alignment - :type aln: :class:`~ost.seq.AlignmentHandle` - :param subst_weight: the substitution weight matrix - (see the :ref:`BLOSUM Matrix<blosum>` section below) - :type subst_weight: :class:`~SubstWeightMatrix` - :param normalize: if set to True, normalize to the range of the - substitution weight matrix - :type normalize: bool - :param seq_a: the index of the first sequence - :type seq_a: int - :param seq_b: the index of the second sequence - :type seq_b: int - :returns: sequence similarity - :rtype: float - .. _substitution-weight-matrices: diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index cc57693e8899ce11547a0cec8366133408670b29..d4ae2bd3c77dc1659bdd6f15211cd64aca9a8e83 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -37,6 +37,7 @@ #include <ost/seq/alg/variance_map.hh> #include <ost/seq/alg/hmm_pseudo_counts.hh> #include <ost/seq/alg/hmm_score.hh> +#include <ost/seq/alg/wrap_parasail.hh> #include <algorithm> @@ -167,12 +168,26 @@ void export_aln_alg() def("MergePairwiseAlignments", &MergePairwiseAlignments); def("Conservation", &Conservation, (arg("assign")=true, arg("prop_name")="cons", arg("ignore_gap")=false)); + +// bend alignment functions around to parasail if available + def("ParasailAvailable", &ParasailAvailable); +#if OST_PARASAIL_ENABLED + def("LocalAlign", &ParaLocalAlign, (arg("seq1"), arg("seq2"),arg("subst_weight"), + arg("gap_open")=-5, arg("gap_ext")=-2)); + def("GlobalAlign", &ParaGlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"), + arg("gap_open")=-5, arg("gap_ext")=-2)); + def("SemiGlobalAlign", &ParaSemiGlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"), + arg("gap_open")=-5, arg("gap_ext")=-2)); +#else def("LocalAlign", &LocalAlign, (arg("seq1"), arg("seq2"),arg("subst_weight"), arg("gap_open")=-5, arg("gap_ext")=-2)); def("GlobalAlign", &GlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"), arg("gap_open")=-5, arg("gap_ext")=-2)); def("SemiGlobalAlign", &SemiGlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"), arg("gap_open")=-5, arg("gap_ext")=-2)); +#endif + + def("ShannonEntropy", &ShannonEntropy, (arg("aln"), arg("ignore_gaps")=true)); } diff --git a/modules/seq/alg/src/CMakeLists.txt b/modules/seq/alg/src/CMakeLists.txt index f97ba489ec77b6d002c5550c29d489b83b68c5e0..7f7441970cbef610f38ec75264c852cd4227e563 100644 --- a/modules/seq/alg/src/CMakeLists.txt +++ b/modules/seq/alg/src/CMakeLists.txt @@ -21,6 +21,7 @@ data/default_contact_weight_matrix.hh data/default_pair_subst_weight_matrix.hh hmm_pseudo_counts.hh hmm_score.hh +wrap_parasail.hh ) set(OST_SEQ_ALG_SOURCES @@ -42,7 +43,8 @@ subst_weight_matrix.cc variance_map.cc hmm_pseudo_counts.cc hmm_score.cc +wrap_parasail.cc ) module(NAME seq_alg HEADER_OUTPUT_DIR ost/seq/alg SOURCES ${OST_SEQ_ALG_SOURCES} - HEADERS ${OST_SEQ_ALG_HEADERS} DEPENDS_ON ost_seq) + HEADERS ${OST_SEQ_ALG_HEADERS} DEPENDS_ON ost_seq LINK ${PARASAIL_LIBRARY}) diff --git a/modules/bindings/src/wrap_parasail.cc b/modules/seq/alg/src/wrap_parasail.cc similarity index 59% rename from modules/bindings/src/wrap_parasail.cc rename to modules/seq/alg/src/wrap_parasail.cc index 34e9fe1ec8ba7fc348feb698f93c0405d811a337..bede42a34698d6c8bd72281700e6e5afd265aa04 100644 --- a/modules/bindings/src/wrap_parasail.cc +++ b/modules/seq/alg/src/wrap_parasail.cc @@ -17,7 +17,7 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ -#include <ost/bindings/wrap_parasail.hh> +#include <ost/seq/alg/wrap_parasail.hh> #if OST_PARASAIL_ENABLED @@ -29,12 +29,19 @@ typedef parasail_result_t* (*parasail_aln_fn)(const char*, int, const char*, namespace{ -ost::seq::AlignmentHandle Align(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, int gap_extension_penalty, - const parasail_matrix_t* matrix, - parasail_aln_fn aln_fn, - bool set_offset) { +ost::seq::AlignmentList Align(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + int gap_open_penalty, int gap_extension_penalty, + ost::seq::alg::SubstWeightMatrixPtr& subst_matrix, + parasail_aln_fn aln_fn, + bool set_offset) { + + // original ost provides penalties as negative numbers... + gap_open_penalty = std::abs(gap_open_penalty); + gap_extension_penalty = std::abs(gap_extension_penalty); + + const parasail_matrix_t* matrix = + parasail_matrix_lookup(subst_matrix->GetName().c_str()); parasail_result_t *r = NULL; parasail_traceback_t *traceback_r = NULL; @@ -108,80 +115,80 @@ ost::seq::AlignmentHandle Align(const ost::seq::SequenceHandle& s1, } aln.AddSequence(aln_s1); aln.AddSequence(aln_s2); + + if(s1.HasAttachedView()) { + aln.AttachView(0, s1.GetAttachedView()); + } + + if(s2.HasAttachedView()) { + aln.AttachView(1, s2.GetAttachedView()); + } parasail_result_free(r); parasail_traceback_free(traceback_r); - return aln; + ost::seq::AlignmentList aln_list = {aln}; + return aln_list; } } -namespace ost{ namespace bindings{ +namespace ost{ namespace seq{ namespace alg{ -ost::seq::AlignmentHandle ParaLocalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, - int gap_extension_penalty, - const String& matrix) { - const parasail_matrix_t* m = parasail_matrix_lookup(matrix.c_str()); - return Align(s1, s2, gap_open_penalty, gap_extension_penalty, - m, parasail_sw_trace_scan_sat, true); +ost::seq::AlignmentList ParaLocalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open, int gap_ext) { + return Align(s1, s2, gap_open, gap_ext, subst, + parasail_sw_trace_scan_sat, true); } -ost::seq::AlignmentHandle ParaGlobalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, - int gap_extension_penalty, - const String& matrix) { - const parasail_matrix_t* m = parasail_matrix_lookup(matrix.c_str()); - return Align(s1, s2, gap_open_penalty, gap_extension_penalty, - m, parasail_nw_trace_scan_sat, false); +ost::seq::AlignmentList ParaGlobalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open, int gap_ext) { + return Align(s1, s2, gap_open, gap_ext, subst, + parasail_nw_trace_scan_sat, false); } -ost::seq::AlignmentHandle ParaSemiGlobalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, - int gap_extension_penalty, - const String& matrix) { - const parasail_matrix_t* m = parasail_matrix_lookup(matrix.c_str()); - return Align(s1, s2, gap_open_penalty, gap_extension_penalty, - m, parasail_sg_trace_scan_sat, false); +ost::seq::AlignmentList ParaSemiGlobalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open, int gap_ext) { + return Align(s1, s2, gap_open, gap_ext, subst, + parasail_sg_trace_scan_sat, false); } bool ParasailAvailable() { return true; } -}} // ns +}}} // ns #else -namespace ost{ namespace bindings{ +namespace ost{ namespace seq{ namespace alg{ -ost::seq::AlignmentHandle ParaLocalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, - int gap_extension_penalty, - const String& matrix) { +ost::seq::AlignmentList ParaLocalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open, int gap_ext) { throw ost::Error("Must enable Parasail when compiling OpenStructure to use " "ParaLocalAlign"); } -ost::seq::AlignmentHandle ParaGlobalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, - int gap_extension_penalty, - const String& matrix) { +ost::seq::AlignmentList ParaGlobalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open, int gap_ext) { throw ost::Error("Must enable Parasail when compiling OpenStructure to use " "ParaGlobalAlign"); } -ost::seq::AlignmentHandle ParaSemiGlobalAlign(const ost::seq::SequenceHandle& s1, - const ost::seq::SequenceHandle& s2, - int gap_open_penalty, - int gap_extension_penalty, - const String& matrix) { +ost::seq::AlignmentList ParaSemiGlobalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open, int gap_ext) { throw ost::Error("Must enable Parasail when compiling OpenStructure to use " "ParaSemiGlobalAlign"); } @@ -190,6 +197,6 @@ bool ParasailAvailable() { return false; } -}} // ns +}}} // ns #endif diff --git a/modules/seq/alg/src/wrap_parasail.hh b/modules/seq/alg/src/wrap_parasail.hh new file mode 100644 index 0000000000000000000000000000000000000000..1a366ad9274b7636e9841f8569ff9614676ebf32 --- /dev/null +++ b/modules/seq/alg/src/wrap_parasail.hh @@ -0,0 +1,53 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2024 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_BINDINGS_PARASAIL_H +#define OST_BINDINGS_PARASAIL_H + +#include <ost/seq/sequence_handle.hh> +#include <ost/seq/alignment_handle.hh> +#include <ost/seq/alg/subst_weight_matrix.hh> + +namespace ost { namespace seq { namespace alg { + +// The following function declarations are intended to mimic the already +// existing function for LocalAligm/GlobalAlign/SemiGlobalAlign +// One example of weirdness is passing the ost::seq::SubstWeightMatrix which +// won't be used in the end. Parasail comes with its own set of matrices +// and the requested one is identified using subst.GetName(). So there is +// plenty of room for optimizations... +ost::seq::AlignmentList ParaLocalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open = -5, int gap_ext = -2); + +ost::seq::AlignmentList ParaGlobalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open = -5, int gap_ext = -2); + +ost::seq::AlignmentList ParaSemiGlobalAlign(const ost::seq::ConstSequenceHandle& s1, + const ost::seq::ConstSequenceHandle& s2, + ost::seq::alg::SubstWeightMatrixPtr& subst, + int gap_open = -5, int gap_ext = -2); + +bool ParasailAvailable(); + +}}} //ns + +#endif