diff --git a/CMakeLists.txt b/CMakeLists.txt index 552aae0c891874ff9771e522e0df7c11899fcc84..c5a7f82a84892190623d18f200922d01fa694ae6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,7 @@ option(ENABLE_STATIC "whether static libraries should be compiled" OFF) option(UBUNTU_LAYOUT "whether Debian/Ubuntu's lib and libexec directory layout should be used" OFF) option(HIDDEN_VISIBILITY "on gcc, use -fvisibility=hidden" OFF) option(ENABLE_MM "whether openmm support is added" OFF) +option(ENABLE_PARASAIL, "whether parasail alignment library should be included" OFF) if (CXX) set(CMAKE_CXX_COMPILER ${CXX}) @@ -130,6 +131,11 @@ if(ENABLE_MM) else() set(_OPENMM OFF) endif() +if(ENABLE_PARASAIL) + set(_PARASAIL ON) +else() + set(_PARASAIL OFF) +endif() if (UBUNTU_LAYOUT) @@ -228,6 +234,10 @@ else(ENABLE_MM) set(_OPENMM_PLUGINS "NONE") endif(ENABLE_MM) +if(ENABLE_PARASAIL) + find_package(Parasail REQUIRED) +endif(ENABLE_PARASAIL) + if (ENABLE_STATIC) set(Boost_LIBRARIES) set(Boost_USE_STATIC_LIBS ON) @@ -290,7 +300,7 @@ include_directories(${Boost_INCLUDE_DIRS} ${PNG_INCLUDE_DIRS} ${OPEN_MM_INCLUDE_DIRS} ${SQLITE3_INCLUDE_DIRS} - ) + ${PARASAIL_INCLUDE_DIR}) if (UNIX) SET(CMAKE_SKIP_BUILD_RPATH FALSE) @@ -342,6 +352,7 @@ message(STATUS " SpaceNav Device support (-DENABLE_SPNAV) : ${_SPNAV}\n" " OpenMM support (-DENABLE_MM) : ${_OPENMM}\n" " OpenMM plugins (-DOPEN_MM_PLUGIN_DIR) : ${_OPENMM_PLUGINS}\n" + " Parasail alignment library (-DENABLE_PARASAIL) : ${_PARASAIL}\n" " Optimize (-DOPTIMIZE) : ${_OPT}\n" " Profiling support (-DPROFILE) : ${_PROFILE}\n" " Double Precision (-DUSE_DOUBLE_PRECISION) : ${_DOUBLE_PREC}\n" diff --git a/cmake_support/FindParasail.cmake b/cmake_support/FindParasail.cmake new file mode 100644 index 0000000000000000000000000000000000000000..40db27a15956429f31a3fce5508f433a91f7ffe9 --- /dev/null +++ b/cmake_support/FindParasail.cmake @@ -0,0 +1,11 @@ +# Try to find Parasail +# +# Find the native Parasail include and library. Sets: +# +# PARASAIL_INCLUDE_DIR - Parasail include dir. +# PARASAIL_LIBRARY - Parasail library. + +find_path(PARASAIL_INCLUDE_DIR parasail.h HINTS PARASAIL_INCLUDE_DIR REQUIRED) +find_library(PARASAIL_LIBRARY NAMES parasail HINTS PARASAIL_LIBRARY REQUIRED) + +mark_as_advanced (PARASAIL_LIBRARY PARASAIL_INCLUDE_DIR) diff --git a/modules/bindings/doc/bindings.rst b/modules/bindings/doc/bindings.rst index 06df24a3074a0b8c602e69763a1f7e68aefc3f24..9b2106fc6bc8b170f5acdd349e34e2342bb66aaf 100644 --- a/modules/bindings/doc/bindings.rst +++ b/modules/bindings/doc/bindings.rst @@ -25,3 +25,4 @@ So far, the binding module includes: lga cadscore dockq + parasail diff --git a/modules/bindings/doc/parasail.rst b/modules/bindings/doc/parasail.rst new file mode 100644 index 0000000000000000000000000000000000000000..1750163aa68b0c7b91191d540ad0774b2001851c --- /dev/null +++ b/modules/bindings/doc/parasail.rst @@ -0,0 +1,74 @@ +Parasail - Pairwise sequence alignments +================================================================================ + +Basic access to pairwise sequence alignment functionality in +`parasail <https://github.com/jeffdaily/parasail/>`_ from +within OpenStructure. + +Citation: + + Jeff Daily. Parasail: SIMD C library for global, semi-global, + and local pairwise sequence alignments. (2016) BMC Bioinformatics + +Parasail support must be enabled at compile time - see installation +instructions. Parasail allows to choose from various strategies but +for the sake of simplicity, this Python binding always calls +``parasail_<mode>_trace_scan_sat`` which seems reasonably fast +across the global, semi-global and local modes. +See parasail documentation for more information. Example usage: + +.. code-block:: python + + from ost import bindings + + print("parasail available?", bindings.ParasailAvailable()) + + s1 = seq.CreateSequence("A", "HEAGAWGHEE") + s2 = seq.CreateSequence("B", "PAWHEA") + + aln = bindings.ParaGlobalAlign(s1, s2, gap_open_penalty=11, + gap_extension_penalty=1, + matrix="blosum62") + + print(aln.ToString()) + + +.. method:: ParasailAvailable() + + returns if OpenStructure has been compiled with parasail support + +.. method:: ParaGlobalAlign(s1, s2, gap_open_penalty=11, \ + gap_extension_penalty=1, matrix="blosum62") + + Calls parasail_nw_trace_scan_sat to perform a Needleman-Wunsch global + alignment. + + :param s1: First sequence + :type s1: :class:`ost.seq.SequenceHandle` + :param s2: Second sequence + :type s2: :class:`ost.seq.SequenceHandle` + :param gap_open_penalty: Gap opening penalty for dynamic programming table + :type gap_open_penalty: :class:`int` + :param gap_extension_penalty: Gap extension penalty for dynamic programming + table + :type gap_extension_penalty: :class:`int` + :param matrix: Substution matrix - Anything that can be resolved by parasail. + E.g. pretty much the full BLOSUM/PAM families of matrices + ("blosum62", "pam100",...) for protein sequences, or + "nuc44"/"dnafull" for nucleotides. + :returns: :class:`ost.seq.AlignmentHandle` + + +.. method:: ParaSemiGlobalAlign(s1, s2, gap_open_penalty=11, \ + gap_extension_penalty=1, matrix="blosum62") + + Same as :meth:`ParaGlobalAlign` but calling parasail_sg_trace_scan_sat to + perform a semi-global alignment. + + +.. method:: ParaLocalAlign(s1, s2, gap_open_penalty=11, \ + gap_extension_penalty=1, matrix="blosum62") + + Same as :meth:`ParaGlobalAlign` but calling parasail_sw_trace_scan_sat to + perform a Smith-Waterman local alignment. The sequences in the returned + alignment might be subsequences of *s1*/*s2* but have offsets set. diff --git a/modules/bindings/pymod/CMakeLists.txt b/modules/bindings/pymod/CMakeLists.txt index 5e3f670ad285bd8a1a4910f70fa27323595cd9f6..afe90d392dae584309fdf8aa70a6d1b167f17c98 100644 --- a/modules/bindings/pymod/CMakeLists.txt +++ b/modules/bindings/pymod/CMakeLists.txt @@ -21,6 +21,7 @@ dockq.py set(OST_BINDINGS_PYMOD_SOURCES export_tmalign.cc + export_parasail.cc wrap_bindings.cc ) diff --git a/modules/bindings/pymod/export_parasail.cc b/modules/bindings/pymod/export_parasail.cc new file mode 100644 index 0000000000000000000000000000000000000000..ea8a987c230f763d57bda2b29dda49cfa56652ed --- /dev/null +++ b/modules/bindings/pymod/export_parasail.cc @@ -0,0 +1,42 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2020 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <boost/python.hpp> +#include <ost/bindings/wrap_parasail.hh> +using namespace boost::python; + +void export_parasail() { + + def("ParaLocalAlign", &ost::bindings::ParaLocalAlign, (arg("s1"), arg("s2"), + arg("gap_open_penalty")=11, + arg("gap_extension_penalty")=1, + arg("matrix")="blosum62")); + + def("ParaGlobalAlign", &ost::bindings::ParaGlobalAlign, (arg("s1"), arg("s2"), + arg("gap_open_penalty")=11, + arg("gap_extension_penalty")=1, + arg("matrix")="blosum62")); + + def("ParaSemiGlobalAlign", &ost::bindings::ParaSemiGlobalAlign, (arg("s1"), arg("s2"), + arg("gap_open_penalty")=11, + arg("gap_extension_penalty")=1, + arg("matrix")="blosum62")); + + def("ParasailAvailable", &ost::bindings::ParasailAvailable); +} diff --git a/modules/bindings/pymod/wrap_bindings.cc b/modules/bindings/pymod/wrap_bindings.cc index c60893f21d62ca6082c5d57529ef46ccb40a88f3..c41157fc7a718e0778e5c8cab3319179caef51aa 100644 --- a/modules/bindings/pymod/wrap_bindings.cc +++ b/modules/bindings/pymod/wrap_bindings.cc @@ -24,8 +24,10 @@ using namespace boost::python; void export_TMAlign(); +void export_parasail(); BOOST_PYTHON_MODULE(_ost_bindings) { export_TMAlign(); + export_parasail(); } diff --git a/modules/bindings/src/CMakeLists.txt b/modules/bindings/src/CMakeLists.txt index 2ae1b882de59efa1ccb99947d4c7c44c99678098..fa8e8d3961efa374afdf007d3316cb94f7e88683 100644 --- a/modules/bindings/src/CMakeLists.txt +++ b/modules/bindings/src/CMakeLists.txt @@ -5,11 +5,13 @@ if (COMPILE_TMTOOLS) endif() set(OST_BINDINGS_SOURCES -wrap_tmalign.cc) +wrap_tmalign.cc +wrap_parasail.cc) set(OST_BINDINGS_HEADERS -wrap_tmalign.hh) +wrap_tmalign.hh +wrap_parasail.hh) module(NAME bindings SOURCES ${OST_BINDINGS_SOURCES} HEADERS ${OST_BINDINGS_HEADERS} HEADER_OUTPUT_DIR ost/bindings - DEPENDS_ON ost_geom ost_mol ost_seq) + DEPENDS_ON ost_geom ost_mol ost_seq LINK ${PARASAIL_LIBRARY}) diff --git a/modules/bindings/src/wrap_parasail.cc b/modules/bindings/src/wrap_parasail.cc new file mode 100644 index 0000000000000000000000000000000000000000..34e9fe1ec8ba7fc348feb698f93c0405d811a337 --- /dev/null +++ b/modules/bindings/src/wrap_parasail.cc @@ -0,0 +1,195 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2024 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <ost/bindings/wrap_parasail.hh> + +#if OST_PARASAIL_ENABLED + +#include <parasail.h> // include for external parasail + +typedef parasail_result_t* (*parasail_aln_fn)(const char*, int, const char*, + int, int, int, + const parasail_matrix_t *); + +namespace{ + +ost::seq::AlignmentHandle Align(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, int gap_extension_penalty, + const parasail_matrix_t* matrix, + parasail_aln_fn aln_fn, + bool set_offset) { + + parasail_result_t *r = NULL; + parasail_traceback_t *traceback_r = NULL; + + String s1_str = s1.GetGaplessString(); + String s2_str = s2.GetGaplessString(); + + const char* s1_c_str = s1_str.c_str(); + int s1_len = s1_str.size(); + const char* s2_c_str = s2_str.c_str(); + int s2_len = s2_str.size(); + + r = aln_fn(s1_c_str, s1_len, s2_c_str, s2_len, 11, 1, matrix); + traceback_r = parasail_result_get_traceback(r, s1_c_str, s1_len, + s2_c_str, s2_len, matrix, + '|', ':', '.'); + + ost::seq::AlignmentHandle aln = ost::seq::CreateAlignment(); + ost::seq::SequenceHandle aln_s1 = + ost::seq::CreateSequence(s1.GetName(), String(traceback_r->query)); + + ost::seq::SequenceHandle aln_s2 = + ost::seq::CreateSequence(s2.GetName(), String(traceback_r->ref)); + + if(set_offset) { + + /* + Tried the code below to set offsets but observed wrongly reported + offset which can be triggered from Python: + + from ost import io + from ost import seq + from ost import bindings + + test_seq = seq.CreateSequence("s", "MMDKHKYRVEIQQMMFVSGEINDPPVETTSLIEDIVRGQVIEILLQSNKTAHLRGSRSIL" + "PEDVIFLIRHDKAKVNRLRTYLSWKDLRKNAKDQDASAGVASGTGNPGAGGEDDLKKAGG" + "GEKDEKDGGNMMKVKKSQIKLPWELQFMFNEHPLENNDDNDDMDEDEREANIVTLKRLKM" + "ADDRTRNMTKEEYVHWSDCRQASFTFRKNKRFKDWSGISQLTEGKPHDDVIDILGFLTFE" + "IVCSLTETALKIKQREQVLQTQKDKSQQSSQDNTNFEFASSTLHRKKRLFDGPENVINPL" + "KPRHIEEAWRVLQTIDMRHRALTNFKGGRLSSKPIIM") + + + s = seq.CreateSequence("asdf", "MTKSIYIIIGYMLHDEEFFYFFFISFYTLWIVFFLLHLSFFSTLSFGIFHDFDTDVYIKVGNYIL" + "HFLELSKNSNLLKNSSEMLKHFRLASLMYMYVYTQMICPSLLGIRN") + + + aln = bindings.ParaLocalAlign(test_seq, s) + + print(aln.ToString()) + print(aln.GetSequence(0).offset) + print(aln.GetSequence(1).offset) + + + + + // Not sure if this is the most efficient way to set offsets, + // i.e. the index of start characters which are actually + // present in returned alignments + parasail_cigar_t* cigar = parasail_result_get_cigar(r, s1_c_str, s1_len, + s2_c_str, s2_len, + matrix); + + aln_s1.SetOffset(cigar->beg_query); + aln_s2.SetOffset(cigar->beg_ref); + parasail_cigar_free(cigar); + */ + + // this is far from optimal but doesn't have the issue described above + aln_s1.SetOffset(s1.GetString().find(aln_s1.GetGaplessString())); + aln_s2.SetOffset(s2.GetString().find(aln_s2.GetGaplessString())); + } + aln.AddSequence(aln_s1); + aln.AddSequence(aln_s2); + + parasail_result_free(r); + parasail_traceback_free(traceback_r); + + return aln; +} + +} + +namespace ost{ namespace bindings{ + +ost::seq::AlignmentHandle ParaLocalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, + int gap_extension_penalty, + const String& matrix) { + const parasail_matrix_t* m = parasail_matrix_lookup(matrix.c_str()); + return Align(s1, s2, gap_open_penalty, gap_extension_penalty, + m, parasail_sw_trace_scan_sat, true); +} + +ost::seq::AlignmentHandle ParaGlobalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, + int gap_extension_penalty, + const String& matrix) { + const parasail_matrix_t* m = parasail_matrix_lookup(matrix.c_str()); + return Align(s1, s2, gap_open_penalty, gap_extension_penalty, + m, parasail_nw_trace_scan_sat, false); +} + +ost::seq::AlignmentHandle ParaSemiGlobalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, + int gap_extension_penalty, + const String& matrix) { + const parasail_matrix_t* m = parasail_matrix_lookup(matrix.c_str()); + return Align(s1, s2, gap_open_penalty, gap_extension_penalty, + m, parasail_sg_trace_scan_sat, false); +} + +bool ParasailAvailable() { + return true; +} + +}} // ns + +#else + +namespace ost{ namespace bindings{ + +ost::seq::AlignmentHandle ParaLocalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, + int gap_extension_penalty, + const String& matrix) { + throw ost::Error("Must enable Parasail when compiling OpenStructure to use " + "ParaLocalAlign"); +} + +ost::seq::AlignmentHandle ParaGlobalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, + int gap_extension_penalty, + const String& matrix) { + throw ost::Error("Must enable Parasail when compiling OpenStructure to use " + "ParaGlobalAlign"); +} + +ost::seq::AlignmentHandle ParaSemiGlobalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty, + int gap_extension_penalty, + const String& matrix) { + throw ost::Error("Must enable Parasail when compiling OpenStructure to use " + "ParaSemiGlobalAlign"); +} + +bool ParasailAvailable() { + return false; +} + +}} // ns + +#endif diff --git a/modules/bindings/src/wrap_parasail.hh b/modules/bindings/src/wrap_parasail.hh new file mode 100644 index 0000000000000000000000000000000000000000..785965467778b3925f20d39982862f27d4322194 --- /dev/null +++ b/modules/bindings/src/wrap_parasail.hh @@ -0,0 +1,49 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2024 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_BINDINGS_PARASAIL_H +#define OST_BINDINGS_PARASAIL_H + +#include <ost/seq/sequence_handle.hh> +#include <ost/seq/alignment_handle.hh> + +namespace ost { namespace bindings { + +ost::seq::AlignmentHandle ParaLocalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty = 11, + int gap_extension_penalty = 1, + const String& matrix="blosum62"); + +ost::seq::AlignmentHandle ParaGlobalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty = 11, + int gap_extension_penalty = 1, + const String& matrix="blosum62"); + +ost::seq::AlignmentHandle ParaSemiGlobalAlign(const ost::seq::SequenceHandle& s1, + const ost::seq::SequenceHandle& s2, + int gap_open_penalty = 11, + int gap_extension_penalty = 1, + const String& matrix="blosum62"); + +bool ParasailAvailable(); + +}} //ns + +#endif diff --git a/modules/config/CMakeLists.txt b/modules/config/CMakeLists.txt index 826de455f14d4ce06edc720108634b9c1b5ab9f4..91d92232034d71c75657957a163111e1c7dcbd8c 100644 --- a/modules/config/CMakeLists.txt +++ b/modules/config/CMakeLists.txt @@ -50,6 +50,11 @@ if (ENABLE_INFO) else() set(info_enabled 0) endif() +if (ENABLE_PARASAIL) + set(parasail_enabled 1) +else() + set(parasail_enabled 0) +endif() set(config_hh_generator "CMake") set(CONFIG_HH_FILE "${CMAKE_CURRENT_SOURCE_DIR}/config.hh") diff --git a/modules/config/config.hh.in b/modules/config/config.hh.in index e48f750d40af524b7d7850773b510e1f9e926935..d84ca3b689e662ae4de739dadf9381b92570928d 100644 --- a/modules/config/config.hh.in +++ b/modules/config/config.hh.in @@ -31,5 +31,6 @@ #define OST_FFT_USE_THREADS @fftw_use_threads@ #define OST_UBUNTU_LAYOUT @ubuntu_layout@ #define OST_INFO_ENABLED @info_enabled@ +#define OST_PARASAIL_ENABLED @parasail_enabled@ #endif diff --git a/modules/doc/install.rst b/modules/doc/install.rst index 1fc703dd90c59bb4d5d338cdf623d030c062c105..42cf59b918bcc27161421c9ff6cbd36bb4407e05 100644 --- a/modules/doc/install.rst +++ b/modules/doc/install.rst @@ -57,6 +57,11 @@ If you would like to use the :mod:`molecular mechanics <ost.mol.mm>` module: * `OpenMM <https://simtk.org/home/openmm>`_ (7.7.0) +If you would like to do pairwise sequence alignments with parasail +in the :mod:`bindings <ost.bindings>` module: + +* `parasail <https://github.com/jeffdaily/parasail/>`_ (2.6.2) + We do not provide backwards compatibility to Python 2.7. The last release supporting Python 2.7 is 1.11.0. @@ -187,6 +192,13 @@ can influence it. * `OPEN_MM_PLUGIN_DIR`: the path for OpenMM plugins * see example below for commonly used paths +* `ENABLE_PARASAIL` controls whether parasail should be enabled. By default, + this is switched off. If it is turned on, you must also set the paths + to your parasail installation: + + * `PARASAIL_INCLUDE_DIR`: the include path containing the file parasail.h + * `PARASAIL_LIBRARY`: the parasail library + * Several paths to other libraries can be set if they are not in the expected locations: