diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 15cd614262cd22d3bb1a40bc3f65348e4862cfba..0c643dc9fa676e51d60b39d815fefb9a97f54a6f 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,9 @@ +Changes in Release 1.6 +-------------------------------------------------------------------------------- + + * Added code to compare structures attached to a multiple seq. aln. + * Incorporated Antechamber based force-field parameter generation for mm mod. + Changes in Release 1.5 -------------------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ab2d74cedcb255bf1ae8f0db289c6b3c66ea16f..451dd53b001b234bfad5423be70591187219a6b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.6.4 FATAL_ERROR) project(OpenStructure CXX C) set (CMAKE_EXPORT_COMPILE_COMMANDS 1) set (OST_VERSION_MAJOR 1) -set (OST_VERSION_MINOR 5) +set (OST_VERSION_MINOR 6) set (OST_VERSION_PATCH 0) set (OST_VERSION_STRING ${OST_VERSION_MAJOR}.${OST_VERSION_MINOR}.${OST_VERSION_PATCH} ) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake_support) diff --git a/modules/conop/doc/aminoacid.rst b/modules/conop/doc/aminoacid.rst index 9751631723150af30d849154f03f253ee4ba2c26..7b42f5a81cb222965adede4775de1459ebfcce03 100644 --- a/modules/conop/doc/aminoacid.rst +++ b/modules/conop/doc/aminoacid.rst @@ -20,19 +20,18 @@ following values: Converter functions -------------------------------------------------------------------------------- .. function:: ResidueToAminoAcid(residue) - OneLetterCodeToAminoAcid(olc) + ResidueNameToAminoAcid(rname) + OneLetterCodeNameToAminoAcid(olc) - Get amino acid from residue or one-letter-code. For non-standard amino acids - or one-letter-codes, XXX is returned. + Get amino acid from residue, residue name (three-letter-code) + or one-letter-code. Returns XXX if residue, residue name or + one-letter-code is not one of the 20 standard amino acids. .. function:: OneLetterCodeToResidueName(olc) AminoAcidToResidueName(amino_acid) Get residue name from one-letter-code or amino_acid. For invalid one-letter-codes or XXX, 'UNK' is returned. - - - .. function:: ResidueNameToOneLetterCode(rname) diff --git a/modules/conop/pymod/export_amino_acids.cc b/modules/conop/pymod/export_amino_acids.cc index 923c74555dded939b0526771db983515776873d6..b1422c409a91c05193b2f7ad4474a50170b10d79 100644 --- a/modules/conop/pymod/export_amino_acids.cc +++ b/modules/conop/pymod/export_amino_acids.cc @@ -77,6 +77,7 @@ void export_AminoAcids() ; def("ResidueToAminoAcid",&ResidueToAminoAcid); + def("ResidueNameToAminoAcid",&ResidueNameToAminoAcid); def("AminoAcidToResidueName",&AminoAcidToResidueName); def("OneLetterCodeToResidueName",&OneLetterCodeToResidueName); def("ResidueNameToOneLetterCode",&ResidueNameToOneLetterCode); diff --git a/modules/conop/src/amino_acids.cc b/modules/conop/src/amino_acids.cc index 7665e97b34240a78c886a06896fe48ef00026ed2..2ce42a48510479ffcb5c7cfe1dc0c801a25a5aca 100644 --- a/modules/conop/src/amino_acids.cc +++ b/modules/conop/src/amino_acids.cc @@ -174,70 +174,91 @@ String OneLetterCodeToResidueName(char olc) } } +AminoAcid ResidueNameToAminoAcid(String rn) +{ + std::transform(rn.begin(),rn.end(),rn.begin(),toupper); + static AminoAcidKeys aa_keys = AminoAcidKeys(); + AminoAcid* aa=find(aa_keys, rn.c_str()); + if (aa) + return *aa; + + return XXX; +} + char ResidueNameToOneLetterCode(String rn) { - String upper_rn=rn; + if (rn.empty()) return 'X'; + std::transform(rn.begin(),rn.end(),rn.begin(),toupper); - if (upper_rn == "ALA") { - return 'A'; - } - if (upper_rn == "ARG") { - return 'R'; + + switch(rn[0]){ + case 'A': { + if (rn == "ALA") return 'A'; + if (rn == "ARG") return 'R'; + if (rn == "ASN") return 'N'; + if (rn == "ASP") return 'D'; + break; + } + + case 'C': { + if (rn == "CYS") return 'C'; + break;; + } + + case 'G': { + if (rn == "GLN") return 'Q'; + if (rn == "GLU") return 'E'; + if (rn == "GLY") return 'G'; + break; + } + + case 'H': { + if (rn == "HIS") return 'H'; + break; + } + + case 'I': { + if (rn == "ILE") return 'I'; + break; + } + + case 'L': { + if (rn == "LEU") return 'L'; + if (rn == "LYS") return 'K'; + break; + } + + case 'M': { + if (rn == "MET") return 'M'; + break; + } + + case 'P': { + if (rn == "PRO") return 'P'; + if (rn == "PHE") return 'F'; + break; + } + + case 'S': { + if (rn == "SER") return 'S'; + break; + } + + case 'T': { + if (rn == "TYR") return 'Y'; + if (rn == "TRP") return 'W'; + if (rn == "THR") return 'T'; + break; + } + + case 'V': { + if (rn == "VAL") return 'V'; + break; + } + + default: break; } - if (upper_rn == "ASN") { - return 'N'; - } - if (upper_rn == "ASP") { - return 'D'; - } - if (upper_rn == "GLN") { - return 'Q'; - } - if (upper_rn == "GLU") { - return 'E'; - } - if (upper_rn == "LYS") { - return 'K'; - } - if (upper_rn == "SER") { - return 'S'; - } - if (upper_rn == "CYS") { - return 'C'; - } - if (upper_rn == "TYR") { - return 'Y'; - } - if (upper_rn == "TRP") { - return 'W'; - } - if (upper_rn == "THR") { - return 'T'; - } - if (upper_rn == "VAL") { - return 'V'; - } - if (upper_rn == "ILE") { - return 'I'; - } - if (upper_rn == "MET") { - return 'M'; - } - if (upper_rn == "LEU") { - return 'L'; - } - if (upper_rn == "GLY") { - return 'G'; - } - if (upper_rn == "PRO") { - return 'P'; - } - if (upper_rn == "HIS") { - return 'H'; - } - if (upper_rn == "PHE") { - return 'F'; - } + return 'X'; } diff --git a/modules/conop/src/amino_acids.hh b/modules/conop/src/amino_acids.hh index aeeb6a21de4fd6fe87a847f6bc7d73f8d0f11196..f79bb68ac9d25f5fda38aa8f94168c720ee7ecda 100644 --- a/modules/conop/src/amino_acids.hh +++ b/modules/conop/src/amino_acids.hh @@ -56,7 +56,7 @@ DLLEXPORT_OST_CONOP AminoAcid OneLetterCodeToAminoAcid(char olc); char DLLEXPORT_OST_CONOP ResidueNameToOneLetterCode(String rn); -char DLLEXPORT_OST_CONOP ResidueNameToOneLetterCode(String rn); +AminoAcid DLLEXPORT_OST_CONOP ResidueNameToAminoAcid(String rn); class AminoAcidSetIterator : public std::iterator<std::forward_iterator_tag, AminoAcid> { diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index a0c348ac5fb2337c3ed3a3e521c26e300d87404d..58a605268bc9e6fd78e598a3301f8b6e9be22857 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -1,4 +1,5 @@ set(OST_CONOP_UNIT_TESTS + test_amino_acids.cc test_heuristic_conop.cc tests.cc test_rule_based_conop.cc diff --git a/modules/conop/tests/test_amino_acids.cc b/modules/conop/tests/test_amino_acids.cc new file mode 100644 index 0000000000000000000000000000000000000000..29266729c3c24d8e4c1857c1321f6c6a14c79870 --- /dev/null +++ b/modules/conop/tests/test_amino_acids.cc @@ -0,0 +1,251 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Gerardo Tauriello + */ + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <boost/test/auto_unit_test.hpp> + +#include <ost/conop/amino_acids.hh> + +using namespace ost; +using namespace ost::conop; + +BOOST_AUTO_TEST_SUITE(conop); + +BOOST_AUTO_TEST_CASE(test_amino_acid_to_residue_name) { + BOOST_CHECK_EQUAL(AminoAcidToResidueName(ALA), "ALA"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(ARG), "ARG"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(ASN), "ASN"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(ASP), "ASP"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(CYS), "CYS"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(GLU), "GLU"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(GLN), "GLN"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(GLY), "GLY"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(HIS), "HIS"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(ILE), "ILE"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(LEU), "LEU"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(LYS), "LYS"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(MET), "MET"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(PHE), "PHE"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(PRO), "PRO"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(SER), "SER"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(THR), "THR"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(TRP), "TRP"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(TYR), "TYR"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(VAL), "VAL"); + BOOST_CHECK_EQUAL(AminoAcidToResidueName(XXX), "UNK"); +} + +BOOST_AUTO_TEST_CASE(test_one_letter_code_to_residue_name) { + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('A'), "ALA"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('R'), "ARG"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('N'), "ASN"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('D'), "ASP"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('C'), "CYS"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('E'), "GLU"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('Q'), "GLN"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('G'), "GLY"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('H'), "HIS"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('I'), "ILE"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('L'), "LEU"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('K'), "LYS"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('M'), "MET"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('F'), "PHE"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('P'), "PRO"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('S'), "SER"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('T'), "THR"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('W'), "TRP"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('Y'), "TYR"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('V'), "VAL"); + // should also wiork if not uppercase! + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('a'), "ALA"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('r'), "ARG"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('n'), "ASN"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('d'), "ASP"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('c'), "CYS"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('e'), "GLU"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('q'), "GLN"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('g'), "GLY"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('h'), "HIS"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('i'), "ILE"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('l'), "LEU"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('k'), "LYS"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('m'), "MET"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('f'), "PHE"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('p'), "PRO"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('s'), "SER"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('t'), "THR"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('w'), "TRP"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('y'), "TYR"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('v'), "VAL"); + // unknown ones + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('B'), "UNK"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('O'), "UNK"); + BOOST_CHECK_EQUAL(OneLetterCodeToResidueName('Z'), "UNK"); +} + +BOOST_AUTO_TEST_CASE(test_one_letter_code_to_amino_acid) { + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('A'), ALA); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('R'), ARG); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('N'), ASN); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('D'), ASP); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('C'), CYS); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('E'), GLU); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('Q'), GLN); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('G'), GLY); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('H'), HIS); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('I'), ILE); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('L'), LEU); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('K'), LYS); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('M'), MET); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('F'), PHE); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('P'), PRO); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('S'), SER); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('T'), THR); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('W'), TRP); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('Y'), TYR); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('V'), VAL); + // should also wiork if not uppercase! + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('a'), ALA); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('r'), ARG); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('n'), ASN); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('d'), ASP); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('c'), CYS); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('e'), GLU); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('q'), GLN); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('g'), GLY); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('h'), HIS); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('i'), ILE); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('l'), LEU); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('k'), LYS); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('m'), MET); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('f'), PHE); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('p'), PRO); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('s'), SER); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('t'), THR); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('w'), TRP); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('y'), TYR); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('v'), VAL); + // unknown ones + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('B'), XXX); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('O'), XXX); + BOOST_CHECK_EQUAL(OneLetterCodeToAminoAcid('Z'), XXX); +} + +BOOST_AUTO_TEST_CASE(test_residue_name_to_one_letter_code) { + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("ALA"), 'A'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("ARG"), 'R'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("ASN"), 'N'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("ASP"), 'D'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("CYS"), 'C'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("GLU"), 'E'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("GLN"), 'Q'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("GLY"), 'G'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("HIS"), 'H'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("ILE"), 'I'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("LEU"), 'L'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("LYS"), 'K'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("MET"), 'M'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("PHE"), 'F'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("PRO"), 'P'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("SER"), 'S'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("THR"), 'T'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("TRP"), 'W'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("TYR"), 'Y'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("VAL"), 'V'); + // should also wiork if not uppercase! + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Ala"), 'A'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Arg"), 'R'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Asn"), 'N'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Asp"), 'D'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Cys"), 'C'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Glu"), 'E'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Gln"), 'Q'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Gly"), 'G'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("His"), 'H'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Ile"), 'I'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Leu"), 'L'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Lys"), 'K'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Met"), 'M'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Phe"), 'F'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Pro"), 'P'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Ser"), 'S'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Thr"), 'T'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Trp"), 'W'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Tyr"), 'Y'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("Val"), 'V'); + // unknown ones + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("MUH"), 'X'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("FOO"), 'X'); + BOOST_CHECK_EQUAL(ResidueNameToOneLetterCode("BAR"), 'X'); +} + +BOOST_AUTO_TEST_CASE(test_residue_name_to_amino_acid) { + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("ALA"), ALA); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("ARG"), ARG); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("ASN"), ASN); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("ASP"), ASP); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("CYS"), CYS); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("GLU"), GLU); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("GLN"), GLN); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("GLY"), GLY); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("HIS"), HIS); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("ILE"), ILE); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("LEU"), LEU); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("LYS"), LYS); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("MET"), MET); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("PHE"), PHE); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("PRO"), PRO); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("SER"), SER); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("THR"), THR); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("TRP"), TRP); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("TYR"), TYR); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("VAL"), VAL); + // should also wiork if not uppercase! + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Ala"), ALA); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Arg"), ARG); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Asn"), ASN); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Asp"), ASP); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Cys"), CYS); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Glu"), GLU); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Gln"), GLN); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Gly"), GLY); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("His"), HIS); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Ile"), ILE); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Leu"), LEU); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Lys"), LYS); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Met"), MET); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Phe"), PHE); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Pro"), PRO); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Ser"), SER); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Thr"), THR); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Trp"), TRP); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Tyr"), TYR); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("Val"), VAL); + // unknown ones + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("MUH"), XXX); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("FOO"), XXX); + BOOST_CHECK_EQUAL(ResidueNameToAminoAcid("BAR"), XXX); +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/geom/src/vecmat3_op.cc b/modules/geom/src/vecmat3_op.cc index 3c5cbdea27e4da7b92180eb558856757aa5f535d..f476eca3098fcde019427da37960a0bf826f0933 100644 --- a/modules/geom/src/vecmat3_op.cc +++ b/modules/geom/src/vecmat3_op.cc @@ -178,20 +178,6 @@ Mat3 AxisRotation(const Vec3& axis, Real angle) xy-ca*xy+sa*z, yy+ca-ca*yy, yz-ca*yz-sa*x, xz-ca*xz-sa*y, yz-ca*yz+sa*x,zz+ca-ca*zz); } - - -Real DihedralAngle(const Vec3& p1, const Vec3& p2, const Vec3& p3, - const Vec3&p4) -{ - Vec3 r1=p2-p1; - Vec3 r2=p3-p2; - Vec3 r3=p4-p3; - Vec3 r12cross = Cross(r1, r2); - Vec3 r23cross = Cross(r2, r3); - return atan2(Dot(r1*Length(r2), r23cross), - Dot(r12cross, r23cross)); -} - Real MinDistance(const Vec3List& l1, const Vec3List& l2) { diff --git a/modules/geom/src/vecmat3_op.hh b/modules/geom/src/vecmat3_op.hh index fbaaec161b20ba7ddc9112c89bda93295a2ec407..d7762cccfe7240647b34049c257152d4ba67e17a 100644 --- a/modules/geom/src/vecmat3_op.hh +++ b/modules/geom/src/vecmat3_op.hh @@ -166,9 +166,16 @@ Mat3 DLLEXPORT_OST_GEOM AxisRotation(const Vec3& axis, Real angle); /// The returned vector is of length 1 Vec3 DLLEXPORT_OST_GEOM OrthogonalVector(const Vec3& axis); - -Real DLLEXPORT_OST_GEOM DihedralAngle(const Vec3& p1, const Vec3& p2, - const Vec3& p3, const Vec3&p4); +/// \brief Get dihedral angle for p1-p2-p3-p4 +inline Real DihedralAngle(const Vec3& p1, const Vec3& p2, + const Vec3& p3, const Vec3& p4) { + const Vec3 r1 = p2-p1; + const Vec3 r2 = p3-p2; + const Vec3 r3 = p4-p3; + const Vec3 r12cross = Cross(r1, r2); + const Vec3 r23cross = Cross(r2, r3); + return std::atan2(Dot(r1*Length(r2), r23cross), Dot(r12cross, r23cross)); +} //! returns std::min of each component inline Vec3 Min(const Vec3& v1, const Vec3& v2) diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 9d6536d9efd7ace2d39917cb26d507ae5c344419..302b8fb4f86949de8ab7f5ec63b7e2ee9bb58ae7 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -256,7 +256,7 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM', raise ValueError("No DCD filename given") return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load, detect_swap, swap_bytes) -def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False): +def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False): """ Load MMCIF file from disk and return one or more entities. Several options allow to customize the exact behaviour of the MMCIF import. For more @@ -264,9 +264,6 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non Residues are flagged as ligand if they are mentioned in a HET record. - :param restrict_chains: If not an empty string, only chains listed in the - string will be imported. - :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides the value of :attr:`IOProfile.fault_tolerant`. @@ -300,13 +297,18 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non if remote: from ost.io.remote import RemoteGet - tmp_file =RemoteGet(filename, from_repo='cif') + tmp_file = RemoteGet(filename, from_repo='cif') filename = tmp_file.name try: ent = mol.CreateEntity() reader = MMCifReader(filename, ent, prof) reader.read_seqres = seqres + + # NOTE: to speed up things, we could introduce a restrict_chains parameter + # similar to the one in LoadPDB. Here, it would have to be a list/set + # of chain-name-strings. + #if reader.HasNext(): reader.Parse() if prof.processor: diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc index 6d16af3e91192a6b47c39a3f4d770e2be94cd260..302720245b63d534d88aceb2bba9c4d64bba0464 100644 --- a/modules/io/pymod/wrap_io.cc +++ b/modules/io/pymod/wrap_io.cc @@ -83,7 +83,8 @@ BOOST_PYTHON_MODULE(_ost_io) def("Instance",IOManager::Instance, return_value_policy<reference_existing_object>() ); - def("LoadEntity",LoadEntity,load_entity_ov()); + def("LoadEntity", LoadEntity, + load_entity_ov(args("filename", "format"))); def("SaveEntity", &save_ent_view, save_entity_view_ov(args("entity", "filename", "format"))); def("SaveEntity", &save_ent_handle, diff --git a/modules/io/src/mol/dcd_io.cc b/modules/io/src/mol/dcd_io.cc index f21b67afc6019bba35f68142d9fd2da8d1fc5802..e81d3135144fd4c9aef7439b0f482726a72c8cd1 100644 --- a/modules/io/src/mol/dcd_io.cc +++ b/modules/io/src/mol/dcd_io.cc @@ -279,7 +279,7 @@ mol::CoordGroupHandle load_dcd(const mol::AtomHandleList& alist, // this atom li geom::Vec3 cell_size, cell_angles; size_t frame_size=calc_frame_size(ucell_flag, gap_flag, xlist.size()); int i=0; - for(;i<header.num;i+=stride) { + while(true){ if (!read_frame(istream, header, frame_size, ucell_flag, gap_flag, swap_flag, xlist, clist, i,cell_size,cell_angles)) { break; @@ -292,11 +292,12 @@ mol::CoordGroupHandle load_dcd(const mol::AtomHandleList& alist, // this atom li // skip frames (defined by stride) if(stride>1) istream.seekg(frame_size*(stride-1),std::ios_base::cur); - } - istream.get(); - if(!istream.eof()) { - LOG_VERBOSE("LoadCHARMMTraj: unexpected trailing file data, bytes read: " - << istream.tellg()); + i+=stride; + istream.get(); + if(istream.eof()){ + break; + } + istream.unget(); } LOG_VERBOSE("Loaded " << cg.GetFrameCount() << " frames with " diff --git a/modules/io/src/mol/load_entity.cc b/modules/io/src/mol/load_entity.cc index 98eea40f861a728340b8ad97b79997800bbd6119..4ce5f94b7e7a2aad92bef022b44463881242641a 100644 --- a/modules/io/src/mol/load_entity.cc +++ b/modules/io/src/mol/load_entity.cc @@ -30,13 +30,13 @@ namespace ost { namespace io { namespace { -void Import(mol::EntityHandle& eh, const String& filename, int flag) +void Import(mol::EntityHandle& eh, const String& filename, const String& format) { Profile profile_import("import"); LOG_DEBUG("creating EntityIOHandle for " << filename); - EntityIOHandlerP ent_io = IOManager::Instance().FindEntityImportHandler(filename); + EntityIOHandlerP ent_io = IOManager::Instance().FindEntityImportHandler(filename, format); - // TODO: proper error handling + // note: error handling done in FindEntityImportHandler LOG_DEBUG("calling import on entity io handle"); ent_io->Import(eh,filename); @@ -53,12 +53,12 @@ void Import(mol::EntityHandle& eh, const String& filename, int flag) } // anon ns -mol::EntityHandle LoadEntity(const String& filename, int flag) +mol::EntityHandle LoadEntity(const String& filename, const String& format) { LOG_DEBUG("creating emtpy entity"); mol::EntityHandle eh=mol::CreateEntity(); mol::XCSEditor xcs_lock=eh.EditXCS(mol::BUFFERED_EDIT); - Import(eh,filename,flag); + Import(eh, filename, format); return eh; } diff --git a/modules/io/src/mol/load_entity.hh b/modules/io/src/mol/load_entity.hh index 0a38c07afbfeaac506badf1445606a101d670a70..80ff0bb492923ed94de7db92b7766bccc37efa8c 100644 --- a/modules/io/src/mol/load_entity.hh +++ b/modules/io/src/mol/load_entity.hh @@ -25,7 +25,7 @@ namespace ost { namespace io { // load unmanaged entity -DLLEXPORT_OST_IO mol::EntityHandle LoadEntity(const String& filename, int flag=0); +DLLEXPORT_OST_IO mol::EntityHandle LoadEntity(const String& filename, const String& format="auto"); }} // ns diff --git a/modules/mol/alg/src/svd_superpose.cc b/modules/mol/alg/src/svd_superpose.cc index 89d6fcfdb50ef748af8c97c5a7a5041359efc8f7..59adf6141d3fcab79dd11753127ea49833c101e7 100644 --- a/modules/mol/alg/src/svd_superpose.cc +++ b/modules/mol/alg/src/svd_superpose.cc @@ -434,7 +434,7 @@ SuperpositionResult MeanSquareMinimizer::IterativeMinimize(int ncycles, Real dis SuperpositionResult SuperposeAtoms(const mol::AtomViewList& atoms1, const mol::AtomViewList& atoms2, - bool apply_transform=true) + bool apply_transform) { MeanSquareMinimizer msm = MeanSquareMinimizer::FromAtomLists(atoms1, atoms2); SuperpositionResult result = msm.MinimizeOnce(); @@ -454,7 +454,7 @@ SuperpositionResult SuperposeAtoms(const mol::AtomViewList& atoms1, SuperpositionResult SuperposeSVD(const mol::EntityView& ev1, const mol::EntityView& ev2, - bool apply_transform=true) { + bool apply_transform) { AtomViewList atoms1 = ev1.GetAtomList(); AtomViewList atoms2 = ev2.GetAtomList(); diff --git a/modules/mol/base/pymod/export_surface.cc b/modules/mol/base/pymod/export_surface.cc index f569d3e403ba59adf4dd362ebcaf39abcd3b792a..c6defa5de52452cf30b8c1402fd444582459fb82 100644 --- a/modules/mol/base/pymod/export_surface.cc +++ b/modules/mol/base/pymod/export_surface.cc @@ -64,8 +64,10 @@ void export_Surface() .def("Attach",attach1) .def("Attach",attach2) .def("GetVertexIDList", &SurfaceHandle::GetVertexIDList) - .def("GetTriIDList", &SurfaceHandle::GetTriIDList) + .def("GetTriIDList", &SurfaceHandle::GetTriIDList) + .def("AddVertex", &SurfaceHandle::AddVertex) .def("GetVertex", &SurfaceHandle::GetVertex) + .def("AddTri", &SurfaceHandle::AddTri) .def("GetTri", &SurfaceHandle::GetTri) .def("FindWithin", &SurfaceHandle::FindWithin) .def("Invert",&SurfaceHandle::Invert) diff --git a/modules/mol/mm/doc/buildingblock.rst b/modules/mol/mm/doc/buildingblock.rst index c9dbcfd7d936986f5ed18ffa1a3905e852fe6918..1e9f7bf02b6f08eb7a3ab4f82cf6d2eda24eb74e 100644 --- a/modules/mol/mm/doc/buildingblock.rst +++ b/modules/mol/mm/doc/buildingblock.rst @@ -1,7 +1,7 @@ Blocks ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm The most basic type of residue description is the BuildingBlock. It contains information on atom names and their corresponding types, charges and diff --git a/modules/mol/mm/doc/forcefield.rst b/modules/mol/mm/doc/forcefield.rst index 9c5754152381c0712e176100ffe0d42a8cbb4f3b..444ff0243e473a3c2d6ab82cd2ec7b4eed37aecf 100644 --- a/modules/mol/mm/doc/forcefield.rst +++ b/modules/mol/mm/doc/forcefield.rst @@ -1,7 +1,7 @@ Forcefields ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm The forcefields are a dump for interactions with their parameters, but also for atom specific information or residue definitions in the form of a @@ -36,26 +36,26 @@ Loading the standard forcefields provided by OpenStructure Reading forcefields -------------------------------------------------------------------------------- -The :class:`FFReader` object is rather experimental. It has nevertheless been -thoroughly tested for loading the CHARMM and AMBER forcefields in the -Gromacs format. The reader is capable of resolving the preprocessor statements -as they are used in Gromacs. - .. class:: FFReader(base_dir) - :param base_dir: Base path of the reader. - All loaded files must be defined relative to this base - path. - - :type base_dir: :class:`str` - The :class:`FFReader` builds up a :class:`Forcefield`, that gets updated with every call to the read functions. If the read files contain preprocessor statements as they are used in Gromacs, they will be applied to all subsequent lines read in. Parsed preprocessor statements are: #include, #define, #ifdef, #ifndef, #else and #endif + Note that this class is rather experimental. It has nevertheless been + thoroughly tested for loading the CHARMM and AMBER forcefields in the + Gromacs format. The reader is capable of resolving the preprocessor statements + as they are used in Gromacs. + + :param base_dir: Base path of the reader. + All loaded files must be defined relative to this base + path. + + :type base_dir: :class:`str` + .. method:: ReadGromacsForcefield() Searches and reads the forcefield.itp and atomtypes.atp files @@ -100,8 +100,6 @@ as they are used in Gromacs. :returns: The reader internal :class:`Forcefield` - - .. code-block:: python path = "path_to_gromacs/share/top/charmm27.ff" @@ -111,9 +109,9 @@ as they are used in Gromacs. reader.ReadGromacsForcefield() #we also want to read several residue databases - reader.Read("aminoacids") - reader.Read("rna") - reader.Read("dna") + reader.ReadResidueDatabase("aminoacids") + reader.ReadResidueDatabase("rna") + reader.ReadResidueDatabase("dna") #ions and water are also nice to have, they're stored in itp files reader.ReadITP("tip3p") @@ -138,9 +136,42 @@ as they are used in Gromacs. ff = new_reader.GetForcefield() ff.Save("charmm_forcefield.dat") +Generating forcefields with Antechamber +-------------------------------------------------------------------------------- + +The antechamber submodule of mol.mm defines functions to use Antechamber (from +AmberTools) to automatically generate force field parameters and load the +results into :class:`~ost.mol.mm.Forcefield` objects. + +**Example usage**: + +.. code-block:: python + + from ost.mol import mm + + # create parameters for RVP using PDB's component dictionary + mm.antechamber.RunAntechamber('RVP', 'components.cif', base_out_dir='ligands') + + # create force field + ff = mm.Forcefield() + ff = mm.antechamber.AddFromPath(ff, 'ligands/RVP') + # equivalent: ff = mm.antechamber.AddFromFiles(ff, 'ligands/RVP/frcmod', + # 'ligands/RVP/out.mpdb') + # since Antechamber cannot deal with ions, you can do it manually + ff = mm.antechamber.AddIon(ff, 'CL', 'CL', 35.45, -1.0, 0.4401, 0.4184) + # save it + ff.Save('ligands/ff.dat') + +**Functions**: + +.. automodule:: ost.mol.mm.antechamber + :members: + The Forcefield Class -------------------------------------------------------------------------------- +.. currentmodule:: ost.mol.mm + .. class:: Forcefield @@ -154,7 +185,7 @@ The Forcefield Class - .. method:: Load(filename) + .. staticmethod:: Load(filename) reads in binary forcefield file diff --git a/modules/mol/mm/doc/integrators.rst b/modules/mol/mm/doc/integrators.rst index ed94d86955d0046806e872480146bf05a38393d3..5c4d3524174b2439d110c14f262af8a5b8d196d9 100644 --- a/modules/mol/mm/doc/integrators.rst +++ b/modules/mol/mm/doc/integrators.rst @@ -1,7 +1,7 @@ Integrators ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm .. class:: Integrator diff --git a/modules/mol/mm/doc/interaction.rst b/modules/mol/mm/doc/interaction.rst index 95d193e1a9e64b9e312590033bc3214d1b198fc9..1b0a705d9a47bab54d4d1775fb45facc04f956fc 100644 --- a/modules/mol/mm/doc/interaction.rst +++ b/modules/mol/mm/doc/interaction.rst @@ -1,7 +1,7 @@ Interactions ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm The :class:`Interaction` object is intended to build a basic container that can be used diff --git a/modules/mol/mm/doc/molmm.rst b/modules/mol/mm/doc/molmm.rst index f243b7dfb4869e7123ccb0757479745fc3a85b8b..2de5d956abd2ff0ac0179094be97a2e147ecc02e 100644 --- a/modules/mol/mm/doc/molmm.rst +++ b/modules/mol/mm/doc/molmm.rst @@ -1,7 +1,7 @@ The mm Module ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm Introduction -------------------------------------------------------------------------------- diff --git a/modules/mol/mm/doc/observers.rst b/modules/mol/mm/doc/observers.rst index 1aecc69fed3ff22253aef5f7a8ddfa8e31df1bbd..265236442ccfc7d9dc7df67458c4075dfc6124ea 100644 --- a/modules/mol/mm/doc/observers.rst +++ b/modules/mol/mm/doc/observers.rst @@ -1,7 +1,7 @@ Observers ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm Observers can be registered to a :class:`Simulation` and get called at a defined interval. diff --git a/modules/mol/mm/doc/settings.rst b/modules/mol/mm/doc/settings.rst index 648be8bcc11b04995057433bf2644eedc679a94d..d8b3a13b4a713791172284bb79ed797374f7ef70 100644 --- a/modules/mol/mm/doc/settings.rst +++ b/modules/mol/mm/doc/settings.rst @@ -1,7 +1,7 @@ The Molecular Mechanics Settings ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm The :class:`Settings` define all parameters to control the buildup of a :class:`Topology` in the :class:`TopologyCreator` and the final setup diff --git a/modules/mol/mm/doc/simulation.rst b/modules/mol/mm/doc/simulation.rst index 217c1cc7df013f3b0c427586e62144c1d681e6b9..9d20aac61fa83325437ac4193e793c5e623e8659 100644 --- a/modules/mol/mm/doc/simulation.rst +++ b/modules/mol/mm/doc/simulation.rst @@ -1,7 +1,7 @@ Simulation ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm The simulation finally connects a :class:`Topology` with an :class:`EntityHandle`. While applying minimization or diff --git a/modules/mol/mm/doc/topology.rst b/modules/mol/mm/doc/topology.rst index 421be2fe9cf5d95ea3cd89352994f5e67bdcbbe6..b616479960ba581b1af8c39dc7210eb784242091 100644 --- a/modules/mol/mm/doc/topology.rst +++ b/modules/mol/mm/doc/topology.rst @@ -1,7 +1,7 @@ Topology ================================================================================ -.. currentmodule:: ost.mol +.. currentmodule:: ost.mol.mm The :class:`Topology` object is an abstract representation of a protein structure or any combination of particles that interact with each other in diff --git a/modules/mol/mm/pymod/CMakeLists.txt b/modules/mol/mm/pymod/CMakeLists.txt index 3156429e1c24cf25b6c7685fd6507254a02e0f8f..1f47d275beb512dc0911c0390a7b737984415998 100644 --- a/modules/mol/mm/pymod/CMakeLists.txt +++ b/modules/mol/mm/pymod/CMakeLists.txt @@ -14,7 +14,8 @@ set(OST_MOL_MM_PYMOD_SOURCES ) set(OST_MOL_MM_PYMOD_MODULES - "__init__.py" + __init__.py + antechamber.py ) if (NOT ENABLE_STATIC) diff --git a/modules/mol/mm/pymod/__init__.py b/modules/mol/mm/pymod/__init__.py index 5c1138cd0354113ea8d1744912a0cc0868202d2b..a4a7c650e1fe7c9bbb0643bdce4a8d4be6e8e6bd 100644 --- a/modules/mol/mm/pymod/__init__.py +++ b/modules/mol/mm/pymod/__init__.py @@ -1,6 +1,26 @@ +#------------------------------------------------------------------------------ +# This file is part of the OpenStructure project <www.openstructure.org> +# +# Copyright (C) 2008-2016 by the OpenStructure authors +# +# This library is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3.0 of the License, or (at your option) +# any later version. +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this library; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#------------------------------------------------------------------------------ + import os.path from _ost_mol_mm import * -import ost +import antechamber +import ost def LoadAMBERForcefield(): return Forcefield.Load(os.path.join(ost.GetSharedDataPath(),'forcefields','AMBER03.dat')) diff --git a/modules/mol/mm/pymod/antechamber.py b/modules/mol/mm/pymod/antechamber.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc48ce3f38effb6e1a92683e1ec0e31d318c326 --- /dev/null +++ b/modules/mol/mm/pymod/antechamber.py @@ -0,0 +1,515 @@ +#------------------------------------------------------------------------------ +# This file is part of the OpenStructure project <www.openstructure.org> +# +# Copyright (C) 2008-2016 by the OpenStructure authors +# +# This library is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3.0 of the License, or (at your option) +# any later version. +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this library; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#------------------------------------------------------------------------------ + +# Functions to use Antechamber (from AmberTools) to automatically generate force +# field parameters. Allows the execution of Antechamber and the parsing of files +# generated by it. + +import _ost_mol_mm as mm +import ost +from ost import settings, mol, geom +import os, subprocess, math + +############################################################################### +# helper functions +def _GetInteraction(functype, atoms, params): + """Get an mm.Interaction with the given func-type and params for the given + atoms (name and types extracted from there).""" + interaction = mm.Interaction(functype) + interaction.SetNames([a.name for a in atoms]) + interaction.SetTypes([a.GetStringProp('type') for a in atoms]) + interaction.SetParam(params) + return interaction + +def _MakeComponentBuildingBlock(eh, ff_dict): + """Take EntityHandle eh (from ParseModifiedPDB) and ff_dict (from + ParseAmberForceField) and return BuildingBlock.""" + # converters: length: A -> nm, angles: deg -> rad, energy: kcal -> kJ + dist_scale = 1./10.0 + angle_scale = math.pi/180. + # bond strength in OpenMM needs a factor of 2 compared to Amber + bond_k_scale = 418.4*2. + angle_k_scale = 4.184 + + # get atom typing (dictionary listing all atoms of a certain type) + atype_dict = {} + for a in eh.atoms: + atype = a.GetStringProp('type') + if not atype in atype_dict: + atype_dict[atype] = [a.handle] + else: + atype_dict[atype].append(a.handle) + + # set masses in entity handle (charges and types set in ParseModifiedPDB) + for atype, mass in ff_dict['MASS']: + for a in atype_dict[atype]: a.SetMass(mass) + + # start by adding atoms + bb = mm.BuildingBlock() + for a in eh.atoms: + bb.AddAtom(a.name, a.GetStringProp('type'), a.GetCharge(), a.GetMass()) + + # add bonds: first all bonds from the entity, then force-field params + bl = eh.GetBondList() + for b in bl: + a1 = b.GetFirst() + a2 = b.GetSecond() + bond = mm.Interaction(mm.FuncType.HarmonicBond) + bond.SetNames([a1.name, a2.name]) + bond.SetTypes([a1.GetStringProp('type'), a2.GetStringProp('type')]) + bb.AddBond(bond) + added_bonds = [] + for atype1, atype2, d0, k in ff_dict['BOND']: + for a1 in atype_dict[atype1]: + for a2 in atype_dict[atype2]: + # check connectivity and uniqueness of bond + if not mol.BondExists(a1, a2): continue + if [a1, a2] in added_bonds or [a2, a1] in added_bonds: continue + added_bonds.append([a1,a2]) + params = [d0*dist_scale, k*bond_k_scale] + bond = _GetInteraction(mm.FuncType.HarmonicBond, [a1, a2], params) + bb.AddBond(bond, replace_existing=True) + + # add angles + added_angles = [] + for atype1, atype2, atype3, a0, k in ff_dict['ANGL']: + # a2 is the central atom + for a2 in atype_dict[atype2]: + for a1 in atype_dict[atype1]: + if not mol.BondExists(a1, a2): continue + for a3 in atype_dict[atype3]: + # check connectivity and uniqueness of angle + if not mol.BondExists(a2, a3): continue + if a1 == a3: continue + if [a1, a2, a3] in added_angles or [a3, a2, a1] in added_angles: + continue + added_angles.append([a1, a2, a3]) + angle = _GetInteraction(mm.FuncType.HarmonicAngle, [a1, a2, a3], + [a0*angle_scale, k*angle_k_scale*2]) + bb.AddAngle(angle) + + # add dihedrals + for atype1, atype2, atype3, atype4, idiv, period, phase, k in ff_dict['DIHE']: + # there can be multiple ones for the same set of types! + added_dihedrals = [] + for a1 in atype_dict[atype1]: + for a2 in atype_dict[atype2]: + if not mol.BondExists(a1, a2): continue + for a3 in atype_dict[atype3]: + if not mol.BondExists(a2, a3): continue + if a1 == a3: continue + for a4 in atype_dict[atype4]: + # check connectivity and uniqueness of dihedral (can be mirrored) + if not mol.BondExists(a3, a4): continue + if a2 == a4: continue + if [a1, a2, a3, a4] in added_dihedrals or \ + [a4, a3, a2, a1] in added_dihedrals: continue + added_dihedrals.append([a1, a2, a3, a4]) + dihe = _GetInteraction(mm.FuncType.PeriodicDihedral, [a1, a2, a3, a4], + [period, phase*angle_scale, k*angle_k_scale]) + bb.AddDihedral(dihe) + + # add impropers + added_impropers = [] + for atype1, atype2, atype3, atype4, period, phase, k in ff_dict['IMPR']: + # third atom is the central atom in amber force-field + for ac in atype_dict[atype3]: + for a1 in atype_dict[atype1]: + if not mol.BondExists(ac, a1): continue + for a2 in atype_dict[atype2]: + if not mol.BondExists(ac, a2): continue + if a1 == a2: continue + for a4 in atype_dict[atype4]: + # check connectivity and uniqueness of impr. (same central) + if not mol.BondExists(ac, a4): continue + if a2 == a4 or a1 == a4: continue + if [ac, a1, a2, a4] in added_impropers or \ + [ac, a1, a4, a2] in added_impropers or \ + [ac, a2, a1, a4] in added_impropers or \ + [ac, a2, a4, a1] in added_impropers or \ + [ac, a4, a1, a2] in added_impropers or \ + [ac, a4, a2, a1] in added_impropers: continue + added_impropers.append([ac, a1, a2, a4]) + impr = _GetInteraction(mm.FuncType.PeriodicImproper, [a1, a2, ac, a4], + [period, phase*angle_scale, k*angle_k_scale]) + bb.AddImproper(impr) + return bb + +def _ParseModifiedPDB(filename): + """Read mpdb file produced by antechamber and return tuple of: + - EntityHandle with connectivity, atom types (property 'type') and charges + - Residue name as extracted from the mpdb file + A RuntimeError is raised if the file can contains multiple residues. + """ + eh = mol.CreateEntity() + rname = '' + edi = eh.EditXCS(mol.BUFFERED_EDIT) + chain = edi.InsertChain('A') + bond_list = [] + # get all atoms and bonds from file + with open(filename, 'r') as in_file: + for line in in_file: + # atom or connectivity + # -> fixed column format assumed for both + if line.startswith('ATOM'): + aname = line[12:17].strip() + # extract res. name and ensure uniqueness + if not rname: + rname = line[17:20].strip() + r = edi.AppendResidue(chain, rname) + elif rname != line[17:20].strip(): + raise RuntimeError("More than one residue in file " + filename +\ + ". Cannot parse!") + # extract and store type and charge + charge = float(line[54:66]) + atype = line[78:80].strip() + a = edi.InsertAtom(r, aname, geom.Vec3()) + a.SetStringProp('type', atype) + a.SetCharge(charge) + elif line.startswith('CONECT'): + ai1 = int(line[6:11]) + # max. 4 bond partners... + for i in range(4): + try: + j = 11 + 5*i + ai2 = int(line[j:j+5]) + # only unique bonds added to list + s = set([ai1, ai2]) + if not s in bond_list: bond_list.append(s) + except: + # exception thrown for empty strings or non-integers + # -> skip + continue + # set all bonds in entity + for indices in bond_list: + indices = list(indices) + a1 = r.atoms[indices[0]-1] + a2 = r.atoms[indices[1]-1] + edi.Connect(a1, a2) + # finalize + edi.UpdateICS() + return eh, rname + +def _ParseAmberForceField(filename): + """Read frcmod file produced by parmchk2 and return dictionary with all + entries for masses, bonds, angles, dihedrals, impropers and non-bonded (LJ) + interactions. Stored as key/list-of-value pairs: + - 'MASS': [atype, mass] + - 'BOND': [atype1, atype2, d0, k] + - 'ANGL': [atype1, atype2, atype3, a0, k] + - 'DIHE': [atype1, atype2, atype3, atype4, idiv, period, phase, k/idiv] + - 'IMPR': [atype1, atype2, atype3, atype4, period, phase, k] + - 'NONB': [Rvdw, epsilon] + """ + keywords = ['MASS', 'BOND', 'ANGL', 'DIHE', 'IMPR', 'NONB'] + with open(filename, 'r') as in_file: + ff_dict = {} + for line in in_file: + # look for keywords + keyword = line[:4] + if not keyword in keywords: continue + # loop until empty line found + ff_dict[keyword] = [] + line = in_file.next() + while len(line.strip()) > 0: + # check for warnings + if 'ATTN' in line: + ost.LogWarning('The following line in ' + filename + ' (' + keyword +\ + ' section) needs revision:\n' + line.strip()) + # fixed column format -> extract entries dep. on current keyword + if keyword == 'MASS': + atype = line[0:2].strip() + s = line[2:].split() + mass = float(s[0]) + ff_dict[keyword].append([atype, mass]) + elif keyword == 'BOND': + atype1 = line[:2].strip() + atype2 = line[3:5].strip() + s = line[5:].split() + k = float(s[0]) + d0 = float(s[1]) + ff_dict[keyword].append([atype1, atype2, d0, k]) + elif keyword == 'ANGL': + atype1 = line[:2].strip() + atype2 = line[3:5].strip() + atype3 = line[6:8].strip() + s = line[8:].split() + k = float(s[0]) + a0 = float(s[1]) + ff_dict[keyword].append([atype1, atype2, atype3, a0, k]) + elif keyword == 'DIHE': + atype1 = line[:2].strip() + atype2 = line[3:5].strip() + atype3 = line[6:8].strip() + atype4 = line[9:11].strip() + s = line[11:].split() + idiv = float(s[0]) + k = float(s[1]) + phase = float(s[2]) + # negative periods = there is more than one term for that dihedral + # -> no need to do anything special about that here... + period = abs(float(s[3])) + ff_dict[keyword].append([atype1, atype2, atype3, atype4, idiv, + period, phase, k/float(idiv)]) + elif keyword == 'IMPR': + atype1 = line[:2].strip() + atype2 = line[3:5].strip() + atype3 = line[6:8].strip() + atype4 = line[9:11].strip() + s = line[11:].split() + k = float(s[0]) + phase = float(s[1]) + period = float(s[2]) + ff_dict[keyword].append([atype1, atype2, atype3, atype4, period, + phase, k]) + elif keyword == 'NONB': + line = line.strip() + atype = line[0:2].strip() + s = line[2:].split() + Rvdw = float(s[0]) + epsilon = float(s[1]) + ff_dict[keyword].append([atype, Rvdw, epsilon]) + # next... + line = in_file.next() + return ff_dict +############################################################################### + +def RunAntechamber(res_name, filename, format='ccif', amberhome=None, + base_out_dir=None): + """Run Antechamber to guess force field parameters for a given residue name. + + This requires an installation of AmberTools (tested with AmberTools15) with + binaries ``antechamber`` and ``parmchk2``. + + This has the same restrictions as Antechamber itself and we assume the input + to be uncharged. Note that Antechamber cannot deal with metal ions and other + non-organic elements. + + The results are stored in a separate folder named `res_name` within + `base_out_dir` (if given, otherwise the current working directory). The main + output files are ``frcmod`` and ``out.mpdb``. The former contains force field + parameters and masses. The latter maps atom names to atom types and defines + the partial charges. The same output could be obtained as follows: + + .. code-block:: console + + $ antechamber -i <FILENAME> -fi <FORMAT> -bk '<RES_NAME>' -o out.mol2 -fo mol2 -c bcc -pf yes + $ parmchk2 -i out.mol2 -f mol2 -o frcmod -a Y + $ antechamber -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes + + The force field parameters can be manually modified if needed. It can for + instance happen that some parameters cannot be identified. Those lines will + be marked with a comment "ATTN, need revision". + + :param res_name: Residue name for which we desire force field parameters. + :type res_name: :class:`str` + :param filename: Path to a file which contains the necessary information for + `res_name`. It must include all hydrogens. + :type filename: :class:`str` + :param format: Format of file given with `filename`. Common formats are 'ccif' + for PDB's component dictionary or 'pdb' for a PDB file + containing the desired residue with all hydrogens. + :type format: :class:`str` + :param amberhome: Base path of your AmberTools installation. If not None, + we look for ``antechamber`` and ``parmchk2`` within + ``AMBERHOME/bin`` additionally to the system's ``PATH``. + :type amberhome: :class:`str` + :param base_out_dir: Path to a base path, where the output will be stored. + If None, the current working directory is used. + :type base_out_dir: :class:`str` + """ + # find antechamber binaries + if amberhome is None: + search_paths = [] + else: + search_paths = [os.path.join(amberhome, 'bin')] + try: + antechamber = settings.Locate('antechamber', search_paths=search_paths) + parmchk2 = settings.Locate('parmchk2', search_paths=search_paths) + except settings.FileNotFound as ex: + ost.LogError("Failed to find Antechamber binaries. Make sure you have " + "AmberTools installed!") + raise ex + + # prepare path + cwd = os.getcwd() + if base_out_dir is None: + base_out_dir = cwd + out_dir = os.path.abspath(os.path.join(base_out_dir, res_name)) + if not os.path.exists(out_dir): + # note: this creates intermediate folders too + try: + os.makedirs(out_dir) + except Exception as ex: + ost.LogError("Failed to create output directory " + out_dir + "!") + raise ex + + # execute it + os.chdir(out_dir) + try: + cmds = [antechamber + " -i " + filename + " -fi " + format + " -bk " \ + + res_name + " -o out.mol2 -fo mol2 -c bcc -pf yes", + parmchk2 + " -i out.mol2 -f mol2 -o frcmod -a Y", + antechamber + " -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes"] + all_sout = "Generating force field parameters for " + res_name + "\n" + all_serr = "" + for cmd in cmds: + all_sout += "-"*70 + "\n" + "Stdout of: " + cmd + "\n" + "-"*70 + "\n" + all_serr += "-"*70 + "\n" + "Stderr of: " + cmd + "\n" + "-"*70 + "\n" + job = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + sout, serr = job.communicate() + all_sout += sout + all_serr += serr + if job.returncode != 0: + ost.LogError("Unsuccessful execution of " + cmd + ". Return code: "\ + + str(job.returncode)) + # write command line outputs + with open("console.stdout", "w") as txt_file: + txt_file.write(all_sout) + with open("console.stderr", "w") as txt_file: + txt_file.write(all_serr) + except Exception as ex: + ost.LogError("Failed to excecute antechamber binaries!") + raise ex + + # get back to original path + os.chdir(cwd) + + # check result + frcmod_filename = os.path.join(out_dir, 'frcmod') + mpdb_filename = os.path.join(out_dir, 'out.mpdb') + if not os.path.exists(frcmod_filename): + raise RuntimeError("Failed to generate frcmod file with Antechamber!") + if not os.path.exists(mpdb_filename): + raise RuntimeError("Failed to generate out.mpdb file with Antechamber!") + +def AddFromFiles(force_field, frcmod_filename, mpdb_filename): + """Add data from a frcmod and an mpdb file to a force field. + + This will add a new :class:`~ost.mol.mm.BuildingBlock` to `force_field` for + the residue defined in those files (residue name is extracted from the mpdb + file which can only contain a single residue). Charges for each atom are + extracted from the mpdb file. According to the frcmod file, an + :class:`~ost.mol.mm.Interaction` is added for each bond, angle, dihedral and + improper. Atom types with masses and non-bonded interactions are added to + `force_field` as needed. + + :param force_field: A force field object to which the new parameters are + added. + :type force_field: :class:`~ost.mol.mm.Forcefield` + :param frcmod_filename: Path to ``frcmod`` file as generated by ``parmchk2``. + :type frcmod_filename: :class:`str` + :param mpdb_filename: Path to mpdb file as generated by ``antechamber``. + :type mpdb_filename: :class:`str` + :return: The updated force field (same as `force_field`). + :rtype: :class:`~ost.mol.mm.Forcefield` + """ + # check files + if not os.path.exists(frcmod_filename): + raise RuntimeError("Could not find frcmod file: " + frcmod_filename) + if not os.path.exists(mpdb_filename): + raise RuntimeError("Could not find mpdb file: " + mpdb_filename) + # read in files + try: + eh, res_name = _ParseModifiedPDB(mpdb_filename) + except Exception as ex: + ost.LogError("Failed to parse mpdb file: " + mpdb_filename) + raise ex + try: + ff_dict = _ParseAmberForceField(frcmod_filename) + except Exception as ex: + ost.LogError("Failed to parse frcmod file: " + frcmod_filename) + raise ex + ost.LogInfo("Adding force field for " + res_name) + # add atoms to force field + for aname, mass in ff_dict['MASS']: + force_field.AddMass(aname, mass) + # add LJs + lj_sigma_scale = 2./10./2**(1./6.) # Rvdw to sigma in nm + lj_epsilon_scale = 4.184 # kcal to kJ + for aname, Rvdw, epsilon in ff_dict['NONB']: + # fix 0,0 (from OpenMM's processAmberForceField.py) + if Rvdw == 0 or epsilon == 0: + Rvdw, epsilon = 1, 0 + lj = mm.Interaction(mm.FuncType.LJ) + lj.SetTypes([aname]) + lj.SetParam([Rvdw*lj_sigma_scale, epsilon*lj_epsilon_scale]) + force_field.AddLJ(lj) + # add building block + bb = _MakeComponentBuildingBlock(eh, ff_dict) + force_field.AddBuildingBlock(res_name, bb) + + return force_field + +def AddFromPath(force_field, out_dir): + """Add data from a directory created with :meth:`Run` to a force field. + See :meth:`AddFromFiles` for details. + + :param force_field: A force field object to which the new parameters are + added. + :type force_field: :class:`~ost.mol.mm.Forcefield` + :param out_dir: Output directory as created with :meth:`Run`. Must contain + files ``frcmod`` and ``out.mpdb``. + :type out_dir: :class:`str` + :return: The updated force field (same as `force_field`). + :rtype: :class:`~ost.mol.mm.Forcefield` + """ + frcmod_filename = os.path.join(out_dir, 'frcmod') + mpdb_filename = os.path.join(out_dir, 'out.mpdb') + return AddFromFiles(force_field, frcmod_filename, mpdb_filename) + +def AddIon(force_field, res_name, atom_name, atom_mass, atom_charge, lj_sigma, + lj_epsilon): + """Add a single atom as an ion to a force field. + + Since Antechamber cannot deal with ions, you can add simple ones easily with + this function. This adds a :class:`~ost.mol.mm.BuildingBlock` to `force_field` + for the given residue name containing a single atom. The atom will have a type + with the same name as the atom name and the given mass, charge and non-bonded + (LJ) interaction parameters. + + :param force_field: A force field object to which the ion is added. + :type force_field: :class:`~ost.mol.mm.Forcefield` + :param res_name: Residue name for the ion to be added. + :type res_name: :class:`str` + :param atom_name: Atom name which is also used as atom type name. + :type atom_name: :class:`str` + :param atom_mass: Mass of the atom. + :type atom_mass: :class:`float` + :param atom_charge: Charge of the atom. + :type atom_charge: :class:`float` + :param lj_sigma: The sigma parameter for the non-bonded LJ interaction. + :type lj_sigma: :class:`float` in nm + :param lj_epsilon: The sigma parameter for the non-bonded LJ interaction. + :type lj_epsilon: :class:`float` in kJ/mol + """ + # add mass (atom_type = atom_name) + force_field.AddMass(atom_name, atom_mass) + # add building block + bb = mm.BuildingBlock() + bb.AddAtom(atom_name, atom_name, atom_charge, atom_mass) + force_field.AddBuildingBlock(res_name, bb) + # add dummy LJ + lj = mm.Interaction(mm.FuncType.LJ) + lj.SetTypes([atom_name]) + lj.SetParam([lj_sigma, lj_epsilon]) + force_field.AddLJ(lj) + +__all__ = ('RunAntechamber', 'AddFromFiles', 'AddFromPath', 'AddIon',) diff --git a/modules/mol/mm/tests/CMakeLists.txt b/modules/mol/mm/tests/CMakeLists.txt index 99f2c4575dbc77849567d5d9e08a0497dfc9118d..b3eb4d801cb1e650fa6b89b718a5786f84e19e65 100644 --- a/modules/mol/mm/tests/CMakeLists.txt +++ b/modules/mol/mm/tests/CMakeLists.txt @@ -6,6 +6,7 @@ set(OST_MOL_MM_UNIT_TESTS test_forcefield.cc test_simulation.cc tests.cc + test_antechamber.py ) ost_unittest(MODULE mol_mm SOURCES "${OST_MOL_MM_UNIT_TESTS}") diff --git a/modules/mol/mm/tests/TYR/ff_AA.dat b/modules/mol/mm/tests/TYR/ff_AA.dat new file mode 100644 index 0000000000000000000000000000000000000000..34dc63afee16df9ae3e00bd21c39c47ccc9f0bd7 Binary files /dev/null and b/modules/mol/mm/tests/TYR/ff_AA.dat differ diff --git a/modules/mol/mm/tests/TYR/frcmod b/modules/mol/mm/tests/TYR/frcmod new file mode 100644 index 0000000000000000000000000000000000000000..956433fb0d20f94e566d639a0ce392ded96aae29 --- /dev/null +++ b/modules/mol/mm/tests/TYR/frcmod @@ -0,0 +1,104 @@ +Remark line goes here +MASS +n3 14.010 0.530 +c3 12.010 0.878 +c 12.010 0.616 +o 16.000 0.434 +ca 12.010 0.360 +oh 16.000 0.465 +hn 1.008 0.161 +h1 1.008 0.135 +hc 1.008 0.135 +ha 1.008 0.135 +ho 1.008 0.135 + +BOND +c3-n3 320.60 1.470 +hn-n3 394.10 1.018 +c -c3 328.30 1.508 +c3-c3 303.10 1.535 +c3-h1 335.90 1.093 +c -o 648.00 1.214 +c -oh 466.40 1.306 +c3-ca 323.50 1.513 +c3-hc 337.30 1.092 +ca-ca 478.40 1.387 +ca-ha 344.30 1.087 +ca-oh 386.10 1.362 +ho-oh 369.60 0.974 + +ANGLE +c -c3-n3 66.590 111.140 +c3-c3-n3 66.180 110.380 +h1-c3-n3 49.390 109.920 +c3-n3-hn 47.130 109.920 +c3-c -o 68.030 123.110 +c3-c -oh 69.840 112.200 +c3-c3-ca 63.250 112.090 +c3-c3-hc 46.370 110.050 +c -c3-c3 63.790 110.530 +c -c3-h1 47.630 107.660 +c -oh-ho 51.190 107.370 +o -c -oh 77.380 122.880 +c3-c3-h1 46.360 110.070 +c3-ca-ca 63.840 120.630 +ca-c3-hc 46.960 110.150 +ca-ca-ca 67.180 119.970 +ca-ca-ha 48.460 120.010 +ca-ca-oh 69.850 119.940 +ca-oh-ho 48.850 109.470 +hn-n3-hn 41.300 107.130 +hc-c3-hc 39.430 108.350 + +DIHE +o -c -c3-n3 6 0.000 180.000 2.000 +oh-c -c3-n3 6 0.000 180.000 2.000 +ca-c3-c3-n3 9 1.400 0.000 3.000 +hc-c3-c3-n3 9 1.400 0.000 3.000 +c3-c -oh-ho 2 4.600 180.000 2.000 +c3-c3-ca-ca 6 0.000 0.000 2.000 +c -c3-n3-hn 6 1.800 0.000 3.000 +c -c3-c3-ca 9 1.400 0.000 3.000 +c -c3-c3-hc 9 1.400 0.000 3.000 +o -c -c3-c3 6 0.000 180.000 2.000 +o -c -c3-h1 1 0.800 0.000 -1.000 +o -c -c3-h1 1 0.080 180.000 3.000 +o -c -oh-ho 1 2.300 180.000 -2.000 +o -c -oh-ho 1 1.900 0.000 1.000 +c3-c3-n3-hn 6 1.800 0.000 3.000 +oh-c -c3-c3 6 0.000 180.000 2.000 +c3-ca-ca-ca 4 14.500 180.000 2.000 +c3-ca-ca-ha 4 14.500 180.000 2.000 +ca-c3-c3-h1 9 1.400 0.000 3.000 +ca-ca-ca-ca 4 14.500 180.000 2.000 +ca-ca-ca-ha 4 14.500 180.000 2.000 +hc-c3-ca-ca 6 0.000 0.000 2.000 +ca-ca-ca-oh 4 14.500 180.000 2.000 +ca-ca-oh-ho 2 1.800 180.000 2.000 +ha-ca-ca-oh 4 14.500 180.000 2.000 +oh-c -c3-h1 6 0.000 180.000 2.000 +h1-c3-n3-hn 6 1.800 0.000 3.000 +h1-c3-c3-hc 9 1.400 0.000 3.000 +ha-ca-ca-ha 4 14.500 180.000 2.000 + +IMPROPER +c3-o -c -oh 1.1 180.0 2.0 +c3-ca-ca-ca 1.1 180.0 2.0 +ca-ca-ca-ha 1.1 180.0 2.0 Using general improper torsional angle X- X-ca-ha, penalty score= 6.0) +ca-ca-ca-oh 1.1 180.0 2.0 Using the default value + +NONBON + n3 1.8240 0.1700 + c3 1.9080 0.1094 + c 1.9080 0.0860 + o 1.6612 0.2100 + ca 1.9080 0.0860 + oh 1.7210 0.2104 + hn 0.6000 0.0157 + h1 1.3870 0.0157 + hc 1.4870 0.0157 + ha 1.4590 0.0150 + ho 0.0000 0.0000 + + + diff --git a/modules/mol/mm/tests/TYR/out.mpdb b/modules/mol/mm/tests/TYR/out.mpdb new file mode 100644 index 0000000000000000000000000000000000000000..ef3dca1510bc20cd082af1607caa0750f1b7e8a6 --- /dev/null +++ b/modules/mol/mm/tests/TYR/out.mpdb @@ -0,0 +1,48 @@ +ATOM 1 N TYR 1 1.320 0.952 1.428 -0.899800 1.55 n3 +ATOM 2 CA TYR 1 -0.018 0.429 1.734 0.132500 1.70 c3 +ATOM 3 C TYR 1 -0.103 0.094 3.201 0.637100 1.70 c +ATOM 4 O TYR 1 0.886 -0.254 3.799 -0.548000 1.52 o +ATOM 5 CB TYR 1 -0.274 -0.831 0.907 -0.071100 1.70 c3 +ATOM 6 CG TYR 1 -0.189 -0.496 -0.559 -0.128300 1.70 ca +ATOM 7 CD1 TYR 1 1.022 -0.589 -1.219 -0.090000 1.70 ca +ATOM 8 CD2 TYR 1 -1.324 -0.102 -1.244 -0.090000 1.70 ca +ATOM 9 CE1 TYR 1 1.103 -0.282 -2.563 -0.182000 1.70 ca +ATOM 10 CE2 TYR 1 -1.247 0.210 -2.587 -0.182000 1.70 ca +ATOM 11 CZ TYR 1 -0.032 0.118 -3.252 0.129100 1.70 ca +ATOM 12 OH TYR 1 0.044 0.420 -4.574 -0.494100 1.52 oh +ATOM 13 OXT TYR 1 -1.279 0.184 3.842 -0.602100 1.52 oh +ATOM 14 H TYR 1 1.977 0.225 1.669 0.365800 1.20 hn +ATOM 15 H2 TYR 1 1.365 1.063 0.426 0.365800 1.20 hn +ATOM 16 HA TYR 1 -0.767 1.183 1.489 0.097700 1.20 h1 +ATOM 17 HB2 TYR 1 0.473 -1.585 1.152 0.064700 1.20 hc +ATOM 18 HB3 TYR 1 -1.268 -1.219 1.134 0.064700 1.20 hc +ATOM 19 HD1 TYR 1 1.905 -0.902 -0.683 0.136000 1.20 ha +ATOM 20 HD2 TYR 1 -2.269 -0.031 -0.727 0.136000 1.20 ha +ATOM 21 HE1 TYR 1 2.049 -0.354 -3.078 0.145500 1.20 ha +ATOM 22 HE2 TYR 1 -2.132 0.523 -3.121 0.145500 1.20 ha +ATOM 23 HH TYR 1 -0.123 -0.399 -5.059 0.421000 1.20 ho +ATOM 24 HXT TYR 1 -1.333 -0.030 4.784 0.446000 1.20 ho +CONECT 1 2 14 15 +CONECT 2 1 3 5 16 +CONECT 3 2 4 13 +CONECT 4 3 +CONECT 5 2 6 17 18 +CONECT 6 5 7 8 +CONECT 7 6 9 19 +CONECT 8 6 10 20 +CONECT 9 7 11 21 +CONECT 10 8 11 22 +CONECT 11 9 10 12 +CONECT 12 11 23 +CONECT 13 3 24 +CONECT 14 1 +CONECT 15 1 +CONECT 16 2 +CONECT 17 5 +CONECT 18 5 +CONECT 19 7 +CONECT 20 8 +CONECT 21 9 +CONECT 22 10 +CONECT 23 12 +CONECT 24 13 diff --git a/modules/mol/mm/tests/test_antechamber.py b/modules/mol/mm/tests/test_antechamber.py new file mode 100644 index 0000000000000000000000000000000000000000..27d0828fab8e53a7eb6b9e6d7c024c9f2099ffd8 --- /dev/null +++ b/modules/mol/mm/tests/test_antechamber.py @@ -0,0 +1,101 @@ +'''Unit tests for the Antechamber submodule.''' + +import unittest +import os, sys + +from ost.mol import mm + +class TestAntechamber(unittest.TestCase): + ########################################################################### + # HELPERS + def _CompareInteractions(self, int_new, int_ref): + self.assertEqual(int_new.IsParametrized(), int_ref.IsParametrized()) + if int_new.IsParametrized(): + self.assertEqual(int_new.GetNames(), int_ref.GetNames()) + self.assertEqual(int_new.GetTypes(), int_ref.GetTypes()) + self.assertEqual(int_new.GetFuncType(), int_ref.GetFuncType()) + params_new = int_new.GetParam() + params_ref = int_ref.GetParam() + self.assertEqual(len(params_new), len(params_ref)) + for p_new, p_ref in zip(params_new, params_ref): + self.assertAlmostEqual(p_new, p_ref) + + def _CompareInteractionLists(self, int_list_new, int_list_ref): + self.assertEqual(len(int_list_new), len(int_list_ref)) + for int_new, int_ref in zip(int_list_new, int_list_ref): + self._CompareInteractions(int_new, int_ref) + + def _CompareBuildingBlocks(self, ff_new, ff_ref, res_name): + # get BuildingBlocks + bb_new = ff_new.GetBuildingBlock(res_name) + bb_ref = ff_ref.GetBuildingBlock(res_name) + # get atoms and LJs + self.assertEqual(len(bb_new.GetAtoms()), len(bb_ref.GetAtoms())) + for aname, aname_ref in zip(bb_new.GetAtoms(), bb_ref.GetAtoms()): + self.assertEqual(aname, aname_ref) + atype = bb_new.GetType(aname) + self.assertEqual(atype, bb_ref.GetType(aname)) + self.assertEqual(bb_new.GetMass(aname), bb_ref.GetMass(aname)) + self.assertEqual(ff_new.GetMass(atype), ff_ref.GetMass(atype)) + self.assertEqual(bb_new.GetCharge(aname), bb_ref.GetCharge(aname)) + self._CompareInteractions(ff_new.GetLJ(atype), ff_ref.GetLJ(atype)) + # compare all types of interactions + self._CompareInteractionLists(bb_new.GetBonds(), bb_ref.GetBonds()) + self._CompareInteractionLists(bb_new.GetAngles(), bb_ref.GetAngles()) + self._CompareInteractionLists(bb_new.GetDihedrals(), + bb_ref.GetDihedrals()) + self._CompareInteractionLists(bb_new.GetImpropers(), + bb_ref.GetImpropers()) + # those below are expected to be 0 btw + self._CompareInteractionLists(bb_new.GetCMaps(), bb_ref.GetCMaps()) + self._CompareInteractionLists(bb_new.GetConstraints(), + bb_ref.GetConstraints()) + self._CompareInteractionLists(bb_new.GetExclusions(), + bb_ref.GetExclusions()) + ########################################################################### + + def testParseFromFiles(self): + # get new ff and compare with ref. + ff_ref = mm.Forcefield.Load('TYR/ff_AA.dat') + # self check + self._CompareBuildingBlocks(ff_ref, ff_ref, 'TYR') + # check parsed files + ff = mm.Forcefield() + mm.antechamber.AddFromFiles(ff, 'TYR/frcmod', 'TYR/out.mpdb') + self._CompareBuildingBlocks(ff, ff_ref, 'TYR') + + def testParseFromPath(self): + # check parsed path + ff = mm.Forcefield() + mm.antechamber.AddFromPath(ff, 'TYR') + ff_ref = mm.Forcefield.Load('TYR/ff_AA.dat') + self._CompareBuildingBlocks(ff, ff_ref, 'TYR') + + def testAddIon(self): + # add pseudo iron and see + ff = mm.Forcefield() + mm.antechamber.AddIon(ff, 'FE2', 'FE', 55, 2, 1, 0) + bb = ff.GetBuildingBlock('FE2') + # check block + self.assertEqual(bb.GetAtoms(), ['FE']) + self.assertEqual(bb.GetTypes(), ['FE']) + self.assertEqual(bb.GetMasses(), [55]) + self.assertEqual(bb.GetCharges(), [2]) + # nothing else... + self.assertEqual(len(bb.GetAngles()), 0) + self.assertEqual(len(bb.GetCMaps()), 0) + self.assertEqual(len(bb.GetConstraints()), 0) + self.assertEqual(len(bb.GetImpropers()), 0) + self.assertEqual(len(bb.GetDihedrals()), 0) + self.assertEqual(len(bb.GetBonds()), 0) + self.assertEqual(len(bb.GetExclusions()), 0) + # check force field + lj = ff.GetLJ('FE') + self.assertTrue(lj.IsParametrized()) + self.assertEqual(lj.GetTypes(), ['FE']) + self.assertEqual(lj.GetFuncType(), mm.FuncType.LJ) + self.assertEqual(lj.GetParam(), [1, 0]) + +if __name__ == "__main__": + from ost import testutils + testutils.RunTests() diff --git a/modules/seq/alg/doc/seqalg.rst b/modules/seq/alg/doc/seqalg.rst index 2137e5ab17b5d3cd1ba68715c6b050f4790c0f32..0285d04c75553e73f667a17860e4b01b743a5b75 100644 --- a/modules/seq/alg/doc/seqalg.rst +++ b/modules/seq/alg/doc/seqalg.rst @@ -4,6 +4,8 @@ .. module:: ost.seq.alg :synopsis: Algorithms for sequences +Algorithms for Alignments +-------------------------------------------------------------------------------- .. function:: MergePairwiseAlignments(pairwise_alns, ref_seq) @@ -161,6 +163,7 @@ :param gap_ext: The gap extension penalty. Must be a negative number :returns: best-scoring alignment of *seq1* and *seq2*. +.. autofunction:: ost.seq.alg.renumber.Renumber .. _contact-prediction: @@ -215,8 +218,10 @@ corresponding to interacting residues. .. autofunction:: PredictContacts -.. function:: CalculateMutualInformation(aln,weights=LoadConstantContactWeightMatrix(), - apc_correction=true,zpx_transformation=true,small_number_correction=0.05) +.. function:: CalculateMutualInformation(aln, \ + weights=LoadConstantContactWeightMatrix(), \ + apc_correction=true, zpx_transformation=true, \ + small_number_correction=0.05) Calculates the mutual information (MI) from a multiple sequence alignemnt. Contributions of each pair of amino-acids are weighted using the matrix **weights** (weighted mutual information). The average product correction (**apc_correction**) correction and transformation into Z-scores (**zpx_transofrmation**) increase prediciton accuracy by reducing the effect of phylogeny and other noise sources. The small number correction reduces noise for alignments with small number of sequences of low diversity. @@ -231,7 +236,8 @@ corresponding to interacting residues. :param small_number_correction: initial values for the probabilities of having a given pair of amino acids *p(a,b)*. :type small_number_correction: :class:`float` -.. function:: CalculateContactScore(aln,weights=LoadDefaultContactWeightMatrix()) +.. function:: CalculateContactScore(aln, \ + weights=LoadDefaultContactWeightMatrix()) Calculates the Contact Score (*CoSc*) from a multiple sequence alignment. For each pair of residues *(i,j)* (pair of columns in the MSA), *CoSc(i,j)* is the average over the values of the **weights** corresponding to the amino acid pairs in the columns. @@ -240,8 +246,8 @@ corresponding to interacting residues. :param weights: The contact weight matrix :type weights: :class`ContactWeightMatrix` -.. function:: CalculateContactSubstitutionScore(aln,ref_seq_index=0, - weights=LoadDefaultPairSubstWeightMatrix()) +.. function:: CalculateContactSubstitutionScore(aln, ref_seq_index=0, \ + weights=LoadDefaultPairSubstWeightMatrix()) Calculates the Contact Substitution Score (*CoEvoSc*) from a multiple sequence alignment. For each pair of residues *(i,j)* (pair of columns in the MSA), *CoEvoSc(i,j)* is the average over the values of the **weights** corresponding to substituting the amino acid pair in the reference sequence (given by **ref_seq_index**) with all other pairs in columns *(i,j)* of the **aln**. @@ -290,3 +296,288 @@ corresponding to interacting residues. A :class:`CharList` of one letter codes of the amino acids for which weights are found in the **weights** matrix. +Get and analyze distance matrices from alignments +-------------------------------------------------------------------------------- + +Given a multiple sequence alignment between a reference sequence (first sequence +in alignment) and a list of structures (remaining sequences in alignment with an +attached view to the structure), this set of functions can be used to analyze +differences between the structures. + +**Example:** + +.. code-block:: python + + # SETUP: aln is multiple sequence alignment, where first sequence is the + # reference sequence and all others have a structure attached + + # clip alignment to only have parts with at least 3 sequences (incl. ref.) + # -> aln will be cut and clip_start is 1st column of aln that was kept + clip_start = seq.alg.ClipAlignment(aln, 3) + + # get variance measure and distance to mean for each residue pair + d_map = seq.alg.CreateDistanceMap(aln) + var_map = seq.alg.CreateVarianceMap(d_map) + dist_to_mean = seq.alg.CreateDist2Mean(d_map) + + # report min. and max. variances + print "MIN-MAX:", var_map.Min(), "-", var_map.Max() + # get data and json-strings for further processing + var_map_data = var_map.GetData() + var_map_json = var_map.GetJsonString() + dist_to_mean_data = dist_to_mean.GetData() + dist_to_mean_json = dist_to_mean.GetJsonString() + +.. function:: ClipAlignment(aln, n_seq_thresh=2, set_offset=true, \ + remove_empty=true) + + Clips alignment so that first and last column have at least the desired number + of structures. + + :param aln: Multiple sequence alignment. Will be cut! + :type aln: :class:`~ost.seq.AlignmentHandle` + :param n_seq_thresh: Minimal number of sequences desired. + :type n_seq_thresh: :class:`int` + :param set_offset: Shall we update offsets for attached views? + :type set_offset: :class:`bool` + :param remove_empty: Shall we remove sequences with only gaps in cut aln? + :type remove_empty: :class:`bool` + :returns: Starting column (0-indexed), where cut region starts (w.r.t. + original aln). -1, if there is no region in the alignment with + at least the desired number of structures. + :rtype: :class:`int` + +.. function:: CreateDistanceMap(aln) + + Create distance map from a multiple sequence alignment. + + The algorithm requires that the sequence alignment consists of at least two + sequences. The sequence at index 0 serves as a frame of reference. All the + other sequences must have an attached view and a properly set sequence offset + (see :meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`). + + For each of the attached views, the C-alpha distance pairs are extracted and + mapped onto the corresponding C-alpha distances in the reference sequence. + + :param aln: Multiple sequence alignment. + :type aln: :class:`~ost.seq.AlignmentHandle` + :returns: Distance map. + :rtype: :class:`DistanceMap` + :raises: Exception if *aln* has less than 2 sequences or any sequence (apart + from index 0) is lacking an attached view. + +.. function:: CreateVarianceMap(d_map, sigma=25) + + :returns: Variance measure for each entry in *d_map*. + :rtype: :class:`VarianceMap` + :param d_map: Distance map as created with :func:`CreateDistanceMap`. + :type d_map: :class:`DistanceMap` + :param sigma: Used for weighting of variance measure + (see :meth:`Distances.GetWeightedStdDev`) + :type sigma: :class:`float` + :raises: Exception if *d_map* has no entries. + +.. function:: CreateDist2Mean(d_map) + + :returns: Distances to mean for each structure in *d_map*. + Structures are in the same order as passed when creating *d_map*. + :rtype: :class:`Dist2Mean` + :param d_map: Distance map as created with :func:`CreateDistanceMap`. + :type d_map: :class:`DistanceMap` + :raises: Exception if *d_map* has no entries. + +.. class:: Distances + + Container used by :class:`DistanceMap` to store a pair wise distance for each + structure. Each structure is identified by its index in the originally used + alignment (see :func:`CreateDistanceMap`). + + .. method:: GetDataSize() + + :returns: Number of pairwise distances. + :rtype: :class:`int` + + .. method:: GetAverage() + + :returns: Average of all distances. + :rtype: :class:`float` + :raises: Exception if there are no distances. + + .. method:: GetMin() + GetMax() + + :returns: Minimal/maximal distance. + :rtype: :class:`tuple` (distance (:class:`float`), index (:class:`int`)) + :raises: Exception if there are no distances. + + .. method:: GetDataElement(index) + + :returns: Element at given *index*. + :rtype: :class:`tuple` (distance (:class:`float`), index (:class:`int`)) + :param index: Index within list of distances (must be < :meth:`GetDataSize`). + :type index: :class:`int` + :raises: Exception if there are no distances or *index* out of bounds. + + .. method:: GetStdDev() + + :returns: Standard deviation of all distances. + :rtype: :class:`float` + :raises: Exception if there are no distances. + + .. method:: GetWeightedStdDev(sigma) + + :returns: Standard deviation of all distances multiplied by + exp( :meth:`GetAverage` / (-2*sigma) ). + :rtype: :class:`float` + :param sigma: Defines weight. + :type sigma: :class:`float` + :raises: Exception if there are no distances. + + .. method:: GetNormStdDev() + + :returns: Standard deviation of all distances divided by :meth:`GetAverage`. + :rtype: :class:`float` + :raises: Exception if there are no distances. + +.. class:: DistanceMap + + Container returned by :func:`CreateDistanceMap`. + Essentially a symmetric :meth:`GetSize` x :meth:`GetSize` matrix containing + up to :meth:`GetNumStructures` distances (list stored as :class:`Distances`). + Indexing of residues starts at 0 and corresponds to the positions in the + originally used alignment (see :func:`CreateDistanceMap`). + + .. method:: GetDistances(i_res1, i_res2) + + :returns: List of distances for given pair of residue indices. + :rtype: :class:`Distances` + :param i_res1: Index of residue. + :type i_res1: :class:`int` + :param i_res2: Index of residue. + :type i_res2: :class:`int` + + .. method:: GetSize() + + :returns: Number of residues in map. + :rtype: :class:`int` + + .. method:: GetNumStructures() + + :returns: Number of structures originally used when creating the map + (see :func:`CreateDistanceMap`). + :rtype: :class:`int` + +.. class:: VarianceMap + + Container returned by :func:`CreateVarianceMap`. + Like :class:`DistanceMap`, it is a symmetric :meth:`GetSize` x :meth:`GetSize` + matrix containing variance measures. + Indexing of residues is as in :class:`DistanceMap`. + + .. method:: Get(i_res1, i_res2) + + :returns: Variance measure for given pair of residue indices. + :rtype: :class:`float` + :param i_res1: Index of residue. + :type i_res1: :class:`int` + :param i_res2: Index of residue. + :type i_res2: :class:`int` + + .. method:: GetSize() + + :returns: Number of residues in map. + :rtype: :class:`int` + + .. method:: Min() + Max() + + :returns: Minimal/maximal variance in the map. + :rtype: :class:`float` + + .. method:: ExportDat(file_name) + ExportCsv(file_name) + ExportJson(file_name) + + Write all variance measures into a file. The possible formats are: + + - "dat" file: a list of "*i_res1+1* *i_res2+1* variance" lines + - "csv" file: a list of ";" separated variances (one line for each *i_res1*) + - "json" file: a JSON formatted file (see :meth:`GetJsonString`) + + :param file_name: Path to file to be created. + :type file_name: :class:`str` + :raises: Exception if the file cannot be opened for writing. + + .. method:: GetJsonString() + + :returns: A JSON formatted list of :meth:`GetSize` lists with + :meth:`GetSize` variances + :rtype: :class:`str` + + .. method:: GetData() + + Gets all the data in this map at once. Note that this is much faster (10x + speedup observed) than parsing :meth:`GetJsonString` or using :meth:`Get` + on each element. + + :returns: A list of :meth:`GetSize` lists with :meth:`GetSize` variances. + :rtype: :class:`list` of :class:`list` of :class:`float` + +.. class:: Dist2Mean + + Container returned by :func:`CreateDist2Mean`. + Stores distances to mean for :meth:`GetNumResidues` residues of + :meth:`GetNumStructures` structures. + Indexing of residues is as in :class:`DistanceMap`. + Indexing of structures goes from 0 to :meth:`GetNumStructures` - 1 and is in + the same order as the structures in the originally used alignment. + + .. method:: Get(i_res, i_str) + + :returns: Distance to mean for given residue and structure indices. + :rtype: :class:`float` + :param i_res: Index of residue. + :type i_res: :class:`int` + :param i_str: Index of structure. + :type i_str: :class:`int` + + .. method:: GetNumResidues() + + :returns: Number of residues. + :rtype: :class:`int` + + .. method:: GetNumStructures() + + :returns: Number of structures. + :rtype: :class:`int` + + .. method:: ExportDat(file_name) + ExportCsv(file_name) + ExportJson(file_name) + + Write all distance measures into a file. The possible formats are: + + - "dat" file: a list of "*i_res+1* distances" lines (distances are space + separated) + - "csv" file: a list of ";" separated distances (one line for each *i_res*) + - "json" file: a JSON formatted file (see :meth:`GetJsonString`) + + :param file_name: Path to file to be created. + :type file_name: :class:`str` + :raises: Exception if the file cannot be opened for writing. + + .. method:: GetJsonString() + + :returns: A JSON formatted list of :meth:`GetNumResidues` lists with + :meth:`GetNumStructures` distances. + :rtype: :class:`str` + + .. method:: GetData() + + Gets all the data in this map at once. Note that this is much faster (10x + speedup observed) than parsing :meth:`GetJsonString` or using :meth:`Get` + on each element. + + :returns: A list of :meth:`GetNumResidues` lists with + :meth:`GetNumStructures` distances. + :rtype: :class:`list` of :class:`list` of :class:`float` diff --git a/modules/seq/alg/pymod/__init__.py b/modules/seq/alg/pymod/__init__.py index 7eebf33878d565ecfcd09564daaf69f696c285f3..44fa50acf5584119845f501ed28ac3b580468cec 100644 --- a/modules/seq/alg/pymod/__init__.py +++ b/modules/seq/alg/pymod/__init__.py @@ -202,3 +202,60 @@ def PredictContacts(ali): mi.RefreshSortedIndices() return mi +def CalculateContactProbability(cpred_res,method): + """ + Calculate the probability of a predicted contact to be correct. + This simply transforms the score associated with a prediction into a probability. + + :param cpred_res: A contact prediction result + :param method: The method which was used for contact prediction. Should be one + of "MI", "MIp", "MIpz", "cevoMI", "cevo" + + :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult` + :type method: :class:`str` + """ + import math + def _growth_function(x,K,x0,B,nu,Q): + p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu) + p=min(1,max(0,p)) + return p + + def _decay_function(x,A,k): + p=A*math.exp(-k*x) + p=min(1,max(0,p)) + return p + + prob_params={} + prob_params["MI"]=[0.05858455345122933, 0.8930350957023122] + prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942] + prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289] + prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446] + prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705] + + if not method in prob_params: + raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo") + params=prob_params[method] + cpred_res.RefreshSortedIndices() + nres=len(cpred_res.matrix) + probabilities=[[0 for i in range(nres)] for j in range(nres)] + + if len(params)==2:func=_decay_function + else:func=_growth_function + + nres=float(nres) + if len(params)==2: + for k,(i,j) in enumerate(cpred_res.sorted_indices): + p=_decay_function(math.log10(0.01+k/nres),*params) + probabilities[i][j]=p + probabilities[j][i]=p + else: + for k,(i,j) in enumerate(cpred_res.sorted_indices): + p=_growth_function(cpred_res.GetScore(i,j),*params) + probabilities[i][j]=p + probabilities[j][i]=p + cpred_res.probabilities=probabilities + return cpred_res + + + + diff --git a/modules/seq/alg/pymod/renumber.py b/modules/seq/alg/pymod/renumber.py index 22873111e877c66da19ee2af80f49ce169ccd4ef..899ad1e1626816e412b9056379d7c883a361d3bb 100644 --- a/modules/seq/alg/pymod/renumber.py +++ b/modules/seq/alg/pymod/renumber.py @@ -1,6 +1,4 @@ -from ost import io, seq, mol, conop -from ost import * - +from ost import seq, mol def _RenumberSeq(seq_handle): if not seq_handle.HasAttachedView(): @@ -8,14 +6,13 @@ def _RenumberSeq(seq_handle): ev = seq_handle.attached_view.CreateEmptyView() new_numbers = mol.ResNumList() for pos in range(len(seq_handle)): - if seq_handle[pos]!='-': - r=seq_handle.GetResidue(pos) + if seq_handle[pos] != '-': + r = seq_handle.GetResidue(pos) if r.IsValid(): - #print seq_handle[pos],r.number.num,pos+1 ev.AddResidue(r, mol.INCLUDE_ALL) new_numbers.append(pos+1) else: - raise RuntimeError('Error: renumbering failed at position %s' %pos) + raise RuntimeError('Error: renumbering failed at position %s' % pos) return ev, new_numbers def _RenumberAln(aln, seq_index): @@ -25,44 +22,62 @@ def _RenumberAln(aln, seq_index): ev = aln.sequences[seq_index].attached_view.CreateEmptyView() new_numbers = mol.ResNumList() for col in aln: - if col[0]!='-' and col[seq_index]!='-': - if col[0]!=col[seq_index]: - raise RuntimeError("residue mismatch at position %d (%s vs %s) (renumbering failed)"%(counter, col[0],col[1])) - rnum=aln.GetSequence(seq_index).GetResidueIndex(counter) - r=aln.GetSequence(seq_index).GetResidue(counter) + if col[0] != '-' and col[seq_index] != '-': + if col[0] != col[seq_index]: + raise RuntimeError("residue mismatch at position %d (%s vs %s) "\ + "(renumbering failed)" % (counter, col[0], + col[seq_index])) + rnum = aln.GetSequence(seq_index).GetResidueIndex(counter) + r = aln.GetSequence(seq_index).GetResidue(counter) if not r.IsValid(): - raise RuntimeError("invalid residue at postion %s (renumbering failed)" %(counter)) + raise RuntimeError("invalid residue at postion %s (renumbering failed)"\ + % (counter)) ev.AddResidue(r, mol.INCLUDE_ALL) new_numbers.append(counter+1) - counter+=1 + counter += 1 return ev, new_numbers def Renumber(seq_handle, sequence_number_with_attached_view=1): """ - Function to renumber an entity according to an alignment between the model sequence - and the full-length target sequence. The aligned model sequence or the alignment itself - with an attached view needs to be provided. Upon succcess, the renumbered entity is returned. + Function to renumber an entity according to an alignment between the model + sequence and the full-length target sequence. The aligned model sequence or + the alignment itself with an attached view needs to be provided. Upon + succcess, the renumbered entity is returned. + If an alignment is given, the sequence must .. code-block:: python from ost.seq.alg import renumber from ost.bindings.clustalw import * - ent=io.LoadPDB("path_to_model") - s=io.LoadSequence("path_to_full_length_fasta_seqeunce") - pdb_seq=seq.SequenceFromChain("model", ent.chains[0]) - aln=ClustalW(s,pdb_seq) - aln.AttachView(1,ent.chains[0].Select("")) - e=Renumber(aln.GetSequence(sequence_number_with_attached_view)) + ent = io.LoadPDB("path_to_model") + s = io.LoadSequence("path_to_full_length_fasta_seqeunce") + pdb_seq = seq.SequenceFromChain("model", ent.chains[0]) + aln = ClustalW(s, pdb_seq) + aln.AttachView(1, ent.chains[0].Select("")) + e = Renumber(aln.sequences[1]) io.SavePDB(e, "renum.pdb") + :param seq_handle: Sequence or alignment handle with attached view. + :type seq_handle: :class:`SequenceHandle` / :class:`AlignmentHandle` + :param sequence_number_with_attached_view: Sequence number for the aln. handle + (not used if seq. handle given) + :type sequence_number_with_attached_view: :class:`int` + :raises: :exc:`RuntimeError` if unknown type of *seq_handle* or if attached + view is missing or if the given alignment sequence is inconsistent. """ - if isinstance(seq_handle, seq.SequenceHandle): + if isinstance(seq_handle, seq.SequenceHandle) \ + or isinstance(seq_handle, seq.ConstSequenceHandle): ev, new_numbers = _RenumberSeq(seq_handle) elif isinstance(seq_handle, seq.AlignmentHandle): ev, new_numbers = _RenumberAln(seq_handle, sequence_number_with_attached_view) + else: + raise RuntimeError("Unknown input type " + str(type(seq_handle))) ev.AddAllInclusiveBonds() - new_ent = mol.CreateEntityFromView(ev,False); + new_ent = mol.CreateEntityFromView(ev, False); new_ent.EditXCS().RenumberChain(new_ent.chains[0], new_numbers) return new_ent + +# choose visible interface +__all__ = ('Renumber', ) \ No newline at end of file diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index 50f84ab9cd18235608c2db8e3e941656aeb6b791..30909790c3c9b61944523e4f966d42d1ffb0a40d 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -1,7 +1,7 @@ //------------------------------------------------------------------------------ // This file is part of the OpenStructure project <www.openstructure.org> // -// Copyright (C) 2008-2011 by the OpenStructure authors +// Copyright (C) 2008-2016 by the OpenStructure authors // // This library is free software; you can redistribute it and/or modify it under // the terms of the GNU Lesser General Public License as published by the Free @@ -32,12 +32,56 @@ #include <ost/seq/alg/pair_subst_weight_matrix.hh> #include <ost/seq/alg/contact_weight_matrix.hh> #include <ost/seq/alg/contact_prediction_score.hh> +#include <ost/seq/alg/clip_alignment.hh> +#include <ost/seq/alg/distance_map.hh> +#include <ost/seq/alg/variance_map.hh> using namespace boost::python; using namespace ost::seq; using namespace ost::seq::alg; -BOOST_PYTHON_MODULE(_ost_seq_alg) +//////////////////////////////////////////////////////////////////// +// wrappers +namespace { +tuple WrapDistancesGetMin(const Distances& distance) { + const std::pair<Real, int> dist = distance.GetMin(); + return boost::python::make_tuple(dist.first, dist.second); +} +tuple WrapDistancesGetMax(const Distances& distance) { + const std::pair<Real, int> dist = distance.GetMax(); + return boost::python::make_tuple(dist.first, dist.second); +} +tuple WrapDistancesGetDataElement(const Distances& distance, uint index) { + const std::pair<Real, int> dist = distance.GetDataElement(index); + return boost::python::make_tuple(dist.first, dist.second); +} + +template <typename T> +list GetList(const T& data, uint num_rows, uint num_cols) { + list ret; + for (uint row = 0; row < num_rows; ++row) { + list my_row; + for (uint col = 0; col < num_cols; ++col) { + my_row.append(data(row, col)); + } + ret.append(my_row); + } + return ret; +} + +list VarMapGetData(const VarianceMapPtr v_map) { + return GetList(*v_map, v_map->GetSize(), v_map->GetSize()); +} + +list DistToMeanGetData(const Dist2MeanPtr d2m) { + return GetList(*d2m, d2m->GetNumResidues(), d2m->GetNumStructures()); +} +} // anon ns +//////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////// +// Work on alignments +void export_aln_alg() { enum_<RefMode::Type>("RefMode") .value("ALIGNMENT", RefMode::ALIGNMENT) @@ -71,7 +115,12 @@ BOOST_PYTHON_MODULE(_ost_seq_alg) def("SemiGlobalAlign", &SemiGlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"), arg("gap_open")=-5, arg("gap_ext")=-2)); def("ShannonEntropy", &ShannonEntropy, (arg("aln"), arg("ignore_gaps")=true)); +} +//////////////////////////////////////////////////////////////////// +// Contact Prediction +void export_contact_prediction() +{ class_<PairSubstWeightMatrix>("PairSubstWeightMatrix",init< std::vector <std::vector <std::vector <std::vector <Real> > > >,std::vector <char> >()) .def(init<>()) .def_readonly("weights",&PairSubstWeightMatrix::weights) @@ -99,7 +148,9 @@ BOOST_PYTHON_MODULE(_ost_seq_alg) def("LoadConstantContactWeightMatrix",LoadConstantContactWeightMatrix); def("LoadDefaultPairSubstWeightMatrix",LoadDefaultPairSubstWeightMatrix); - scope mat_scope = class_<SubstWeightMatrix, SubstWeightMatrixPtr>("SubstWeightMatrix", init<>()) + // NOTE: anything after this is within SubstWeightMatrix scope + scope mat_scope = class_<SubstWeightMatrix, SubstWeightMatrixPtr> + ("SubstWeightMatrix", init<>()) .def("GetWeight", &SubstWeightMatrix::GetWeight) .def("SetWeight", &SubstWeightMatrix::SetWeight) .def("GetMinWeight", &SubstWeightMatrix::GetMinWeight) @@ -113,5 +164,68 @@ BOOST_PYTHON_MODULE(_ost_seq_alg) .value("BLOSUM80", SubstWeightMatrix::BLOSUM80) .value("BLOSUM100", SubstWeightMatrix::BLOSUM100) ; +} +//////////////////////////////////////////////////////////////////// +// getting/analyzing distance matrices from alignments +void export_distance_analysis() +{ + def("ClipAlignment", &ClipAlignment, (arg("aln"), arg("n_seq_thresh")=2, + arg("set_offset")=true, arg("remove_empty")=true)); + def("CreateDistanceMap", &CreateDistanceMap, (arg("aln"))); + def("CreateVarianceMap", &CreateVarianceMap, (arg("d_map"), arg("sigma")=25)); + def("CreateDist2Mean", &CreateDist2Mean, (arg("d_map"))); + + class_<Distances>("Distances", no_init) + .def("GetDataSize", &Distances::GetDataSize) + .def("GetAverage", &Distances::GetAverage) + .def("GetMin", &WrapDistancesGetMin) + .def("GetMax", &WrapDistancesGetMax) + .def("GetDataElement", &WrapDistancesGetDataElement, (arg("index"))) + .def("GetStdDev", &Distances::GetStdDev) + .def("GetWeightedStdDev", &Distances::GetWeightedStdDev, (arg("sigma"))) + .def("GetNormStdDev", &Distances::GetNormStdDev) + ; + + class_<DistanceMap, DistanceMapPtr, + boost::noncopyable>("DistanceMap", no_init) + .def("GetDistances", &DistanceMap::GetDistances, + return_value_policy<reference_existing_object>(), + (arg("i_res1"), arg("i_res2"))) + .def("GetSize", &DistanceMap::GetSize) + .def("GetNumStructures", &DistanceMap::GetNumStructures) + ; + + class_<VarianceMap, VarianceMapPtr, + boost::noncopyable>("VarianceMap", no_init) + .def("Get", &VarianceMap::Get, (arg("i_res1"), arg("i_res2")), + return_value_policy<return_by_value>()) + .def("GetSize", &VarianceMap::GetSize) + .def("Min", &VarianceMap::Min) + .def("Max", &VarianceMap::Max) + .def("ExportDat", &VarianceMap::ExportDat, (arg("file_name"))) + .def("ExportCsv", &VarianceMap::ExportCsv, (arg("file_name"))) + .def("ExportJson", &VarianceMap::ExportJson, (arg("file_name"))) + .def("GetJsonString", &VarianceMap::GetJsonString) + .def("GetData", &VarMapGetData) + ; + + class_<Dist2Mean, Dist2MeanPtr, + boost::noncopyable>("Dist2Mean", no_init) + .def("Get", &Dist2Mean::Get, (arg("i_res"), arg("i_str"))) + .def("GetNumResidues", &Dist2Mean::GetNumResidues) + .def("GetNumStructures", &Dist2Mean::GetNumStructures) + .def("ExportDat", &Dist2Mean::ExportDat, (arg("file_name"))) + .def("ExportCsv", &Dist2Mean::ExportCsv, (arg("file_name"))) + .def("ExportJson", &Dist2Mean::ExportJson, (arg("file_name"))) + .def("GetJsonString", &Dist2Mean::GetJsonString) + .def("GetData", &DistToMeanGetData) + ; +} + +BOOST_PYTHON_MODULE(_ost_seq_alg) +{ + export_aln_alg(); + export_contact_prediction(); + export_distance_analysis(); } diff --git a/modules/seq/alg/src/CMakeLists.txt b/modules/seq/alg/src/CMakeLists.txt index 7e969a6405fded0d522879d06a99ca86b2fb29b6..ea32bbd6cb8c98977e6d0933f132edc7a33d93c7 100644 --- a/modules/seq/alg/src/CMakeLists.txt +++ b/modules/seq/alg/src/CMakeLists.txt @@ -1,40 +1,44 @@ set(OST_SEQ_ALG_HEADERS -sequence_identity.hh -sequence_similarity.hh -module_config.hh -ins_del.hh -subst_weight_matrix.hh alignment_opts.hh -entropy.hh -merge_pairwise_alignments.hh +clip_alignment.hh conservation.hh -local_align.hh +contact_prediction_score.hh +contact_weight_matrix.hh +distance_map.hh +entropy.hh global_align.hh -semiglobal_align.hh +ins_del.hh +local_align.hh +merge_pairwise_alignments.hh +module_config.hh pair_subst_weight_matrix.hh -contact_weight_matrix.hh -contact_prediction_score.hh -data/default_pair_subst_weight_matrix.hh +semiglobal_align.hh +sequence_identity.hh +sequence_similarity.hh +subst_weight_matrix.hh +variance_map.hh data/default_contact_weight_matrix.hh +data/default_pair_subst_weight_matrix.hh ) set(OST_SEQ_ALG_SOURCES -merge_pairwise_alignments.cc -local_align.cc +clip_alignment.cc +conservation.cc +contact_prediction_score.cc +contact_weight_matrix.cc +distance_map.cc +entropy.cc global_align.cc +ins_del.cc +local_align.cc +merge_pairwise_alignments.cc +pair_subst_weight_matrix.cc semiglobal_align.cc -entropy.cc sequence_identity.cc sequence_similarity.cc -ins_del.cc -conservation.cc -contact_weight_matrix.cc subst_weight_matrix.cc -pair_subst_weight_matrix.cc -contact_prediction_score.cc +variance_map.cc ) module(NAME seq_alg HEADER_OUTPUT_DIR ost/seq/alg SOURCES ${OST_SEQ_ALG_SOURCES} HEADERS ${OST_SEQ_ALG_HEADERS} DEPENDS_ON ost_seq) - - diff --git a/modules/seq/alg/src/clip_alignment.cc b/modules/seq/alg/src/clip_alignment.cc new file mode 100644 index 0000000000000000000000000000000000000000..02ff9c14b572ff46f0386ae57ce9adb3f81a2d9a --- /dev/null +++ b/modules/seq/alg/src/clip_alignment.cc @@ -0,0 +1,99 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Gerardo Tauriello, Juergen Haas + */ + +#include <vector> +#include <ost/seq/alg/clip_alignment.hh> + +namespace ost { namespace seq { namespace alg { + +int ClipAlignment(AlignmentHandle& aln, uint n_seq_thresh, + bool set_offset, bool remove_empty) { + // count entries per column + const int num_residues = aln.GetLength(); + const int num_seqs = aln.GetCount(); + std::vector<uint> counts(num_residues, 0); + // parse all sequences + for (int i_s = 0; i_s < num_seqs; ++i_s) { + String s = aln.GetSequence(i_s).GetString(); + // only consider non-gaps + const size_t from = s.find_first_not_of("-"); + if (from == String::npos) continue; + const size_t last = s.find_last_not_of("-"); + for (size_t i = from; i <= last; ++i) { + if (s[i] != '-') ++counts[i]; + } + } + + // find clip_start and clip_end + int clip_start = 0; + while (clip_start < num_residues && counts[clip_start] < n_seq_thresh) { + ++clip_start; + } + // check if we clipped away everything -> ABORT + if (clip_start == num_residues) { + if (remove_empty) aln = seq::CreateAlignment(); + return -1; + } + int clip_end = num_residues - 1; + while (clip_end > clip_start && counts[clip_end] < n_seq_thresh) { + --clip_end; + } + + // update offsets + if (set_offset) { + for (int i_s = 0; i_s < num_seqs; ++i_s) { + // only relevant if a view attached... + if (!aln.GetSequence(i_s).HasAttachedView()) continue; + // count non-gapped part to be cut + String s = aln.GetSequence(i_s).GetString(); + const size_t from = s.find_first_not_of("-"); + if (from == String::npos) continue; + uint cut_res = 0; + for (int i = from; i < clip_start; ++i) { + if (s[i] != '-') ++cut_res; + } + // update offset + aln.SetSequenceOffset(i_s, aln.GetSequenceOffset(i_s) + cut_res); + } + } + + // cut alignment + if (clip_end < num_residues - 1) aln.Cut(clip_end + 1, num_residues); + if (clip_start > 0) aln.Cut(0, clip_start); + + // remove empty sequences + if (remove_empty) { + int i_s = 0; + while (i_s < aln.GetCount()) { + if (aln.GetSequence(i_s).GetFirstNonGap() == -1) { + aln.RemoveSequence(i_s); + } else { + ++i_s; + } + } + } + + return clip_start; +} + +}}} diff --git a/modules/seq/alg/src/clip_alignment.hh b/modules/seq/alg/src/clip_alignment.hh new file mode 100644 index 0000000000000000000000000000000000000000..7af6abf5bd04bd2f33f9721b9e1edf9b0e81e272 --- /dev/null +++ b/modules/seq/alg/src/clip_alignment.hh @@ -0,0 +1,47 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Gerardo Tauriello, Juergen Haas + */ + +#ifndef OST_SEQ_ALG_CLIP_ALIGNMENT_HH +#define OST_SEQ_ALG_CLIP_ALIGNMENT_HH + +#include <ost/seq/alignment_handle.hh> +#include <ost/seq/alg/module_config.hh> + +namespace ost { namespace seq { namespace alg { + +/// \brief Clips alignment so that first and last column have at least the +/// desired number of structures. +/// +/// \param aln Multiple sequence alignment. Will be cut. +/// \param n_seq_thresh Minimal number of sequences desired. +/// \param set_offset Shall we update offsets for attached views? +/// \param remove_empty Shall we remove sequences with only gaps in cut aln? +/// \returns Starting column (0-indexed), where cut region starts (w.r.t. +/// original aln). -1, if there is no region in the alignment with +/// at least the desired number of structures. +int DLLEXPORT_OST_SEQ_ALG +ClipAlignment(AlignmentHandle& aln, uint n_seq_thresh=2, bool set_offset=true, + bool remove_empty=true); +}}} + +#endif diff --git a/modules/seq/alg/src/distance_map.cc b/modules/seq/alg/src/distance_map.cc new file mode 100644 index 0000000000000000000000000000000000000000..57a1729135147b1707c47221e67d3768a304064e --- /dev/null +++ b/modules/seq/alg/src/distance_map.cc @@ -0,0 +1,75 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Marco Biasini, Juergen Haas, Gerardo Tauriello + */ + +#include <ost/mol/mol.hh> +#include <ost/seq/alg/distance_map.hh> + +namespace ost { namespace seq { namespace alg { + +DistanceMapPtr CreateDistanceMap(const seq::AlignmentHandle& alignment) { + + // setup and check + if (alignment.GetCount() <= 1) { + throw IntegrityError("At least two sequences required!"); + } + seq::ConstSequenceHandle ref_seq = alignment.GetSequence(0); + String ref_seq_str = ref_seq.GetString(); + uint len = ref_seq.GetGaplessString().size(); + DistanceMapPtr dist_map(new DistanceMap(len, alignment.GetCount() - 1)); + + // parse all non-ref sequences + for (int j = 1; j < alignment.GetCount(); ++j) { + seq::ConstSequenceHandle s = alignment.GetSequence(j); + if (!s.HasAttachedView()) { + throw IntegrityError("All non-ref sequences must have an attached view!"); + } + // parse all residues in ref seq. + for (uint x = 0; x < ref_seq_str.size(); ++x) { + // skip gaps + if (ref_seq_str[x] == '-') continue; + // find CA and skip gaps and weird residues + mol::ResidueView res_a = s.GetResidue(x); + if (!res_a) continue; + mol::AtomView ca_a = res_a.FindAtom("CA"); + if (!ca_a) continue; + // compare with all others (considering symmetry) + for (uint y = x+1; y < ref_seq_str.size(); ++y) { + // skip gaps + if (ref_seq_str[y]=='-') continue; + // find CA and skip gaps and weird residues + mol::ResidueView res_b = s.GetResidue(y); + if (!res_b) continue; + mol::AtomView ca_b = res_b.FindAtom("CA"); + if (!ca_b) continue; + // add it to the map + dist_map->AddDistance(ref_seq.GetResidueIndex(x), + ref_seq.GetResidueIndex(y), + geom::Distance(ca_a.GetPos(), ca_b.GetPos()), + j); + } + } + } + return dist_map; +} + +}}} diff --git a/modules/seq/alg/src/distance_map.hh b/modules/seq/alg/src/distance_map.hh new file mode 100644 index 0000000000000000000000000000000000000000..faf1f553ccfee616d58d40a760234a5dc8f8dd63 --- /dev/null +++ b/modules/seq/alg/src/distance_map.hh @@ -0,0 +1,187 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Marco Biasini, Juergen Haas, Gerardo Tauriello + */ + +#ifndef OST_SEQ_ALG_DISTANCE_MAP_HH +#define OST_SEQ_ALG_DISTANCE_MAP_HH + +#include <numeric> +#include <cmath> +#include <algorithm> + +#include <ost/tri_matrix.hh> +#include <ost/integrity_error.hh> +#include <ost/seq/alignment_handle.hh> +#include <ost/seq/alg/module_config.hh> + +namespace ost { namespace seq { namespace alg { + +/////////////////////////////////////////////////////////////////////////////// +// HELPERS +namespace { + +template <typename Pair> +struct sum_first { + typename Pair::first_type operator()(const typename Pair::first_type& x, + const Pair& y) const + { + return x + y.first; + } +}; + +template <typename Pair> +struct CompareFirst { + bool operator()(const Pair& x, const Pair& y) const { + return (x.first < y.first); + } +}; + +} // anon ns +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Container for a pair wise distance for each structure. +/// Each structure is identified by its index in the originally used alignment. +struct DLLEXPORT_OST_SEQ_ALG Distances { + + typedef std::vector<std::pair<Real,int> > FloatArray; + + uint GetDataSize() const { return values_.size(); } + + void Add(Real dist, int index) { + values_.push_back(std::make_pair(dist,index)); + } + + Real GetAverage() const { + if (values_.empty()) throw IntegrityError("List of distances empty!"); + Real sum = std::accumulate(values_.begin(), values_.end(), 0.0, + sum_first<FloatArray::value_type>()); + return sum/values_.size(); + } + + std::pair<Real,int> GetMin() const { + if (values_.empty()) throw IntegrityError("List of distances empty!"); + return *std::min_element(values_.begin(), values_.end(), + CompareFirst<FloatArray::value_type>()); + } + + std::pair<Real,int> GetMax() const { + if (values_.empty()) throw IntegrityError("List of distances empty!"); + return *std::max_element(values_.begin(), values_.end(), + CompareFirst<FloatArray::value_type>()); + } + + std::pair<Real,int> GetDataElement(uint index) const { + if (values_.size() != 0) { + if (values_.size() > index) { + return values_[index]; + } else { + std::stringstream error_message; + error_message << "size of distances data vector: " << values_.size() + << " is smaller than requested: " << index+1; + throw IntegrityError(error_message.str()); + } + } else { + throw IntegrityError("List of distances empty!"); + } + } + + Real GetStdDev() const { + if (values_.empty()) throw IntegrityError("List of distances empty!"); + const Real avg = this->GetAverage(); + return this->do_get_stddev(avg); + } + + Real GetWeightedStdDev(Real sigma) const { + if (values_.empty()) throw IntegrityError("List of distances empty!"); + const Real avg = this->GetAverage(); + const Real weight = std::exp(avg/(-2*sigma)); + return this->do_get_stddev(avg) * weight; + } + + Real GetNormStdDev() const { + if (values_.empty()) throw IntegrityError("List of distances empty!"); + const Real avg = this->GetAverage(); + return this->do_get_stddev(avg) / avg; + } + + bool operator==(const Distances& rhs) const { return values_ == rhs.values_; } + bool operator!=(const Distances& rhs) const { return values_ != rhs.values_; } + +private: + + Real do_get_stddev(Real avg) const { + Real var = 0.0; + for (FloatArray::const_iterator i = values_.begin(), + e = values_.end(); i != e; ++i) { + const Real diff = i->first - avg; + var += diff*diff; + } + return std::sqrt(var/values_.size()); + } + + FloatArray values_; +}; + +/// \brief Container for pair wise distances of a set of structures. +/// Essentially a symmetric GetSize x GetSize matrix containing up to +/// GetNumStructures distances (see Distances). +class DistanceMap; +typedef boost::shared_ptr<DistanceMap> DistanceMapPtr; + +class DLLEXPORT_OST_SEQ_ALG DistanceMap { +public: + DistanceMap(uint nresidues, uint num_structures) + : dist_(nresidues), num_structures_(num_structures) { } + + void AddDistance(uint i, uint j, Real distance, int index) { + dist_(i ,j).Add(distance,index); + } + + const Distances& GetDistances(uint i, uint j) const { + return dist_(i, j); + } + + uint GetSize() const { return dist_.GetSize(); } + + uint GetNumStructures() const { return num_structures_; } + +private: + TriMatrix<Distances> dist_; + uint num_structures_; +}; + + +/// \brief create distance map from a multiple sequence alignment. +/// +/// The algorithm requires that the sequence alignment consists of at least two +/// sequences. The sequence at index 0 serves as a frame of reference. All the +/// other sequences must have an attached view and a properly set +/// \ref seq::SequenceHandle::SetSequenceOffset() "sequence offset". +/// +/// For each of the attached views, the C-alpha distance pairs are extracted and +/// mapped onto the corresponding C-alpha distances in the reference sequence. +DistanceMapPtr DLLEXPORT_OST_SEQ_ALG +CreateDistanceMap(const seq::AlignmentHandle& alignment); + +}}} + +#endif diff --git a/modules/seq/alg/src/variance_map.cc b/modules/seq/alg/src/variance_map.cc new file mode 100644 index 0000000000000000000000000000000000000000..c3c214f836f124096bd4ddc2a08ffafe4a575b35 --- /dev/null +++ b/modules/seq/alg/src/variance_map.cc @@ -0,0 +1,196 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Gerardo Tauriello, Juergen Haas + */ + +#include <fstream> +#include <iostream> +#include <sstream> + +#include <ost/mol/mol.hh> + +#include <ost/seq/alg/distance_map.hh> +#include <ost/seq/alg/variance_map.hh> + +namespace ost { namespace seq { namespace alg { + +/////////////////////////////////////////////////////////////////////////////// +// HELPERS +namespace { +// dump stuff using () operator +template <typename T> +void DumpCsv(const String& file_name, const T& data, uint num_rows, + uint num_cols) { + // open file + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + // dump each row + for (uint row = 0; row < num_rows; ++row) { + for (uint col = 0; col < num_cols; ++col) { + out_file << data(row, col); + if (col < num_cols-1) out_file << ";"; + } + out_file << std::endl; + } + out_file.close(); +} + +template <typename T> +String GetJson(const T& data, uint num_rows, uint num_cols) { + std::ostringstream out_stream; + out_stream << "["; + for (uint row = 0; row < num_rows; ++row) { + out_stream << "["; + for (uint col = 0; col < num_cols; ++col) { + out_stream << data(row, col); + if (col < num_cols-1) out_stream << ","; + } + out_stream << "]"; + if (row < num_rows-1) out_stream << ","; + } + out_stream << "]"; + return out_stream.str(); +} +} // anon ns + +/////////////////////////////////////////////////////////////////////////////// +// IO +void VarianceMap::ExportDat(const String& file_name) { + uint len = this->GetSize(); + if (len == 0) throw IntegrityError("Matrix empty"); + // dump it + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + for (uint rows = 0; rows < len; ++rows) { + for (uint cols = 0; cols < len; ++cols) { + out_file << (rows+1) << " " << (cols+1) << " " << this->Get(rows, cols) + << std::endl; + } + } + out_file.close(); +} + +void VarianceMap::ExportCsv(const String& file_name) { + uint len = this->GetSize(); + if (len == 0) throw IntegrityError("Matrix empty"); + DumpCsv(file_name, *this, len, len); +} + +void VarianceMap::ExportJson(const String& file_name) { + if (this->GetSize() == 0) throw IntegrityError("Matrix empty"); + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + out_file << this->GetJsonString() << std::endl; + out_file.close(); +} + +String VarianceMap::GetJsonString() { + uint len = this->GetSize(); + if (len == 0) throw IntegrityError("Matrix empty"); + return GetJson(*this, len, len); +} + +void Dist2Mean::ExportDat(const String& file_name) { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + // dump it + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + for (uint i_res = 0; i_res < num_residues_; ++i_res) { + out_file << (i_res+1); + for (uint i_str = 0; i_str < num_structures_; ++i_str) { + out_file << " " << this->Get(i_res, i_str); + } + out_file << std::endl; + } + out_file.close(); +} + +void Dist2Mean::ExportCsv(const String& file_name) { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + DumpCsv(file_name, *this, num_residues_, num_structures_); +} + +void Dist2Mean::ExportJson(const String& file_name) { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + out_file << this->GetJsonString() << std::endl; + out_file.close(); +} + +String Dist2Mean::GetJsonString() { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + return GetJson(*this, num_residues_, num_structures_); +} + +/////////////////////////////////////////////////////////////////////////////// +// Create maps +VarianceMapPtr CreateVarianceMap(const DistanceMapPtr dmap, Real sigma) { + // setup + uint len = dmap->GetSize(); + if (len == 0) { + throw IntegrityError("empty distance map provided"); + } + // get weighted std for each pair (symmetric storage!) + VarianceMapPtr vmap(new VarianceMap(len)); + for (uint i_res = 0; i_res < len; ++i_res) { + for (uint i_res2 = 0; i_res2 <= i_res; ++i_res2) { + const Distances& di = dmap->GetDistances(i_res, i_res2); + if (di.GetDataSize() > 0) { + vmap->Set(i_res, i_res2, di.GetWeightedStdDev(sigma)); + } else { + vmap->Set(i_res, i_res2, 0); + } + } + } + return vmap; +} + +Dist2MeanPtr CreateDist2Mean(const DistanceMapPtr dmap) { + // setup/check + uint nstruc = dmap->GetNumStructures(); + uint len = dmap->GetSize(); + if (len == 0 || nstruc == 0) { + throw IntegrityError("empty distance map provided"); + } + // sum up all distances to mean for each residue (symmetric dmap!) + Dist2MeanPtr dist2mean(new Dist2Mean(len, nstruc)); + for (uint i_res = 0; i_res < len; ++i_res) { + for (uint i_res2 = 0; i_res2 <= i_res; ++i_res2) { + const Distances& di = dmap->GetDistances(i_res, i_res2); + const uint size = di.GetDataSize(); + if (size >= 2) { + const Real avg = di.GetAverage(); + for (uint h = 0; h < size; ++h) { + const std::pair<Real, int> ret = di.GetDataElement(h); + const Real val = std::abs(ret.first - avg); + dist2mean->Add(i_res, ret.second-1, val); + if (i_res != i_res2) dist2mean->Add(i_res2, ret.second-1, val); + } + } + } + } + // normalize + dist2mean->DivideBy(len); + return dist2mean; +} + +}}} diff --git a/modules/seq/alg/src/variance_map.hh b/modules/seq/alg/src/variance_map.hh new file mode 100644 index 0000000000000000000000000000000000000000..0c7e02824241a7cee6becbc580cc408b6bf41747 --- /dev/null +++ b/modules/seq/alg/src/variance_map.hh @@ -0,0 +1,137 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Gerardo Tauriello, Juergen Haas + */ + +#ifndef OST_SEQ_ALG_VARIANCE_MAP_HH +#define OST_SEQ_ALG_VARIANCE_MAP_HH + +#include <algorithm> + +#include <ost/seq/alignment_handle.hh> +#include <ost/tri_matrix.hh> +#include <ost/integrity_error.hh> +#include <ost/seq/alg/module_config.hh> +#include <ost/seq/alg/distance_map.hh> + +namespace ost { namespace seq { namespace alg { + +class VarianceMap; +class Dist2Mean; +typedef boost::shared_ptr<VarianceMap> VarianceMapPtr; +typedef boost::shared_ptr<Dist2Mean> Dist2MeanPtr; + +/// \brief Container for variances for each entry in a distance map. +/// Main functionality: Get/Set, Min, Max, ExportXXX +/// Get/Set and GetSize taken from TriMatrix. +class DLLEXPORT_OST_SEQ_ALG VarianceMap: public TriMatrix<Real> { +public: + // all values initialized to 0 in constructor! + VarianceMap(int nresidues): TriMatrix<Real>(nresidues, 0) { } + + Real Min() { + if (this->GetSize() == 0) throw IntegrityError("Matrix empty"); + std::vector<Real>& data = this->Data(); + return *std::min_element(data.begin(), data.end()); + } + + Real Max() { + if (this->GetSize() == 0) throw IntegrityError("Matrix empty"); + std::vector<Real>& data = this->Data(); + return *std::max_element(data.begin(), data.end()); + } + + void ExportDat(const String& file_name); + void ExportCsv(const String& file_name); + void ExportJson(const String& file_name); + String GetJsonString(); +}; + +/// \brief Container for distances to mean for N structures. +/// Main functionality: Get/Set, ExportXXX +class DLLEXPORT_OST_SEQ_ALG Dist2Mean { +public: + // all values initialized to 0 in constructor! + Dist2Mean(uint num_residues, uint num_structures) + : num_residues_(num_residues), num_structures_(num_structures) + , values_(num_residues * num_structures, 0) { } + + void Set(uint i_res, uint i_str, Real val) { + values_[GetIndex(i_res, i_str)] = val; + } + + Real Get(uint i_res, uint i_str) const { + return values_[GetIndex(i_res, i_str)]; + } + + Real& operator()(uint i_res, uint i_str) { + return values_[GetIndex(i_res, i_str)]; + } + Real operator()(uint i_res, uint i_str) const { + return values_[GetIndex(i_res, i_str)]; + } + + std::vector<Real>& Data() { return values_; } + + uint GetNumResidues() const { return num_residues_; } + uint GetNumStructures() const { return num_structures_; } + + void Add(uint i_res, uint i_str, Real val) { + values_[GetIndex(i_res, i_str)] += val; + } + + void DivideBy(Real val) { + for (uint i = 0; i < values_.size(); ++i) values_[i] /= val; + } + + void ExportDat(const String& file_name); + void ExportCsv(const String& file_name); + void ExportJson(const String& file_name); + String GetJsonString(); + +private: + uint GetIndex(uint i_res, uint i_str) const { + assert(i_res < num_residues_); + assert(i_str < num_structures_); + return (i_res * num_structures_ + i_str); + } + + uint num_residues_; + uint num_structures_; + std::vector<Real> values_; +}; + +/// \returns Variance measure for each entry in dmap. +/// \param dmap Distance map as created with CreateDistanceMap. +/// \param sigma Used for weighting of variance measure +/// (see Distances::GetWeightedStdDev) +VarianceMapPtr DLLEXPORT_OST_SEQ_ALG +CreateVarianceMap(const DistanceMapPtr dmap, Real sigma=25); + +/// \returns Distances to mean for each structure in dmap. +/// Structures are in the same order as passed when creating dmap. +/// \param dmap Distance map as created with CreateDistanceMap. +Dist2MeanPtr DLLEXPORT_OST_SEQ_ALG +CreateDist2Mean(const DistanceMapPtr dmap); + +}}} + +#endif diff --git a/modules/seq/alg/tests/CMakeLists.txt b/modules/seq/alg/tests/CMakeLists.txt index ca0e72988f5fb0e4e8e7841a405e7f1303541cd5..529a3eb62e02f3273258680064670f4e9ceb8850 100644 --- a/modules/seq/alg/tests/CMakeLists.txt +++ b/modules/seq/alg/tests/CMakeLists.txt @@ -1,4 +1,5 @@ set(OST_SEQ_ALG_UNIT_TESTS + test_distance_analysis.cc test_merge_pairwise_alignments.cc test_sequence_identity.cc tests.cc @@ -10,5 +11,6 @@ set(OST_SEQ_ALG_UNIT_TESTS test_aligntoseqres.py ) -ost_unittest(MODULE seq_alg SOURCES "${OST_SEQ_ALG_UNIT_TESTS}") +ost_unittest(MODULE seq_alg SOURCES "${OST_SEQ_ALG_UNIT_TESTS}" + LINK ost_mol ost_io) diff --git a/modules/seq/alg/tests/test_distance_analysis.cc b/modules/seq/alg/tests/test_distance_analysis.cc new file mode 100644 index 0000000000000000000000000000000000000000..e8b41e819329c2c6adcf60e6f241154bc740f186 --- /dev/null +++ b/modules/seq/alg/tests/test_distance_analysis.cc @@ -0,0 +1,269 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2016 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Gerardo Tauriello + */ + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <boost/test/auto_unit_test.hpp> + +#include <ost/mol/atom_view.hh> +#include <ost/mol/xcs_editor.hh> +#include <ost/io/mol/load_entity.hh> +#include <ost/seq/alg/clip_alignment.hh> +#include <ost/seq/alg/distance_map.hh> +#include <ost/seq/alg/variance_map.hh> +#include <ost/integrity_error.hh> + +using namespace ost; +using namespace ost::seq; + +BOOST_AUTO_TEST_SUITE(ost_seq_alg); + +// HELPERS +AlignmentHandle GetClippingAln() { + AlignmentHandle aln = CreateAlignment(); + aln.AddSequence(CreateSequence("ref", "DDFAGTHN")); + aln.AddSequence(CreateSequence("A", "-DFAG---")); + aln.AddSequence(CreateSequence("B", "-----THN")); + aln.AddSequence(CreateSequence("C", "--FAG---")); + aln.AddSequence(CreateSequence("D", "DDFAG---")); + // attach view + mol::EntityHandle ent = io::LoadEntity("testfiles/dummy.pdb"); + for (int i_s = 1; i_s < aln.GetCount(); ++i_s) { + aln.AttachView(i_s, ent.CreateFullView()); + aln.SetSequenceOffset(i_s, aln.GetSequence(i_s).GetFirstNonGap()); + } + return aln; +} + +AlignmentHandle GetDistanceMapAln() { + // prepare alignment + AlignmentHandle aln = CreateAlignment(); + aln.AddSequence(CreateSequence("ref", "DDFAGTHN")); + aln.AddSequence(CreateSequence("A", "DDFAGT--")); + aln.AddSequence(CreateSequence("B", "--FAGTH-")); + // attach views + mol::EntityHandle ent = io::LoadEntity("testfiles/dummy.pdb"); + // move CA of res. at index 3 (res. num = 4) for ent + mol::AtomHandle atom = ent.FindAtom("A", 4, "CA"); + ent.EditXCS(mol::UNBUFFERED_EDIT).SetAtomPos(atom, geom::Vec3(0,0,0)); + BOOST_CHECK_EQUAL(ent.FindAtom("A", 4, "CA").GetPos(), geom::Vec3(0,0,0)); + mol::EntityHandle ent2 = io::LoadEntity("testfiles/dummy.pdb"); + aln.AttachView(1, ent.CreateFullView()); + aln.AttachView(2, ent2.CreateFullView()); + aln.SetSequenceOffset(2, 2); + return aln; +} + +alg::DistanceMapPtr GetDistanceMap() { + AlignmentHandle aln = GetDistanceMapAln(); + return alg::CreateDistanceMap(aln); +} + +// TESTS +BOOST_AUTO_TEST_CASE(test_clip_alignment_no_change) { + // clip it at level 2 (no change!) + AlignmentHandle aln_cut = GetClippingAln(); + int clip_start = alg::ClipAlignment(aln_cut, 2); + BOOST_CHECK_EQUAL(clip_start, 0); + BOOST_CHECK_EQUAL(aln_cut.GetLength(), 8); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 5); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(0).GetString(), "DDFAGTHN"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(1).GetString(), "-DFAG---"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(2).GetString(), "-----THN"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(3).GetString(), "--FAG---"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(4).GetString(), "DDFAG---"); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(0), 0); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(1), 1); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(2), 5); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(3), 2); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(4), 0); +} + +BOOST_AUTO_TEST_CASE(test_clip_alignment_no_offset) { + // clip it at level 3 without updating offsets and deleting + AlignmentHandle aln_cut = GetClippingAln(); + int clip_start = alg::ClipAlignment(aln_cut, 3, false, false); + BOOST_CHECK_EQUAL(clip_start, 1); + BOOST_CHECK_EQUAL(aln_cut.GetLength(), 4); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 5); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(0).GetString(), "DFAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(1).GetString(), "DFAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(2).GetString(), "----"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(3).GetString(), "-FAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(4).GetString(), "DFAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(0), 0); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(1), 1); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(2), 5); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(3), 2); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(4), 0); +} + +BOOST_AUTO_TEST_CASE(test_clip_alignment_def) { + // clip it at level 3 with updating offsets and deleting + AlignmentHandle aln_cut = GetClippingAln(); + int clip_start = alg::ClipAlignment(aln_cut, 3, true, true); + BOOST_CHECK_EQUAL(clip_start, 1); + BOOST_CHECK_EQUAL(aln_cut.GetLength(), 4); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 4); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(0).GetString(), "DFAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(1).GetString(), "DFAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(2).GetString(), "-FAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(3).GetString(), "DFAG"); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(0), 0); // REF! + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(1), 1); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(2), 2); + BOOST_CHECK_EQUAL(aln_cut.GetSequenceOffset(3), 1); +} + +BOOST_AUTO_TEST_CASE(test_clip_alignment_remove_all) { + // clip it at level 5 to remove all + AlignmentHandle aln_cut = GetClippingAln(); + int clip_start = alg::ClipAlignment(aln_cut, 5, true, true); + BOOST_CHECK_EQUAL(clip_start, -1); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 0); +} + +BOOST_AUTO_TEST_CASE(test_clip_alignment_generic) { + // generic tests + AlignmentHandle aln_cut = CreateAlignment(); + int clip_start = alg::ClipAlignment(aln_cut); + BOOST_CHECK_EQUAL(clip_start, -1); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 0); + aln_cut.AddSequence(CreateSequence("ref", "DDFAG")); + aln_cut.AddSequence(CreateSequence("A", "-----")); + clip_start = alg::ClipAlignment(aln_cut, 1, true, true); + BOOST_CHECK_EQUAL(clip_start, 0); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 1); + BOOST_CHECK_EQUAL(aln_cut.GetSequence(0).GetString(), "DDFAG"); + aln_cut.AddSequence(CreateSequence("A", "-----")); + clip_start = alg::ClipAlignment(aln_cut, 2, true, true); + BOOST_CHECK_EQUAL(clip_start, -1); + BOOST_CHECK_EQUAL(aln_cut.GetCount(), 0); +} + +BOOST_AUTO_TEST_CASE(test_distance_map) { + // setup + AlignmentHandle aln = GetDistanceMapAln(); + alg::DistanceMapPtr d_map = alg::CreateDistanceMap(aln); + geom::Vec3 pos2 = aln.GetResidue(2, 2).FindAtom("CA").GetPos(); + geom::Vec3 pos3 = aln.GetResidue(2, 3).FindAtom("CA").GetPos(); + geom::Vec3 pos4 = aln.GetResidue(2, 4).FindAtom("CA").GetPos(); + // check map size + BOOST_CHECK_EQUAL(d_map->GetSize(), 8u); + BOOST_CHECK_EQUAL(d_map->GetNumStructures(), 2u); + // check single entry + const alg::Distances& di = d_map->GetDistances(2,4); + BOOST_CHECK(di == d_map->GetDistances(4,2)); + BOOST_CHECK_EQUAL(di.GetDataSize(), 2u); + std::pair<Real,int> data0 = di.GetDataElement(0); + std::pair<Real,int> data1 = di.GetDataElement(1); + BOOST_CHECK_EQUAL(data0.second + data1.second, 1+2); + BOOST_CHECK_EQUAL(data0.first, geom::Length(pos4 - pos2)); + BOOST_CHECK_EQUAL(data1.first, data0.first); + // check modified entry + const alg::Distances& di2 = d_map->GetDistances(3,4); + BOOST_CHECK_EQUAL(di2.GetDataSize(), 2u); + data0 = di2.GetDataElement(0); + data1 = di2.GetDataElement(1); + if (data0.second == 2) std::swap(data0, data1); + BOOST_CHECK_EQUAL(data0.second, 1); + BOOST_CHECK_EQUAL(data1.second, 2); + BOOST_CHECK_EQUAL(data0.first, geom::Length(pos4)); + BOOST_CHECK_EQUAL(data1.first, geom::Length(pos4 - pos3)); + BOOST_CHECK(data0.first != data1.first); + // min/max and other features + std::pair<Real,int> min_data = di2.GetMin(); + std::pair<Real,int> max_data = di2.GetMax(); + if (data0.first > data1.first) { + BOOST_CHECK(max_data == data0); + BOOST_CHECK(min_data == data1); + } else { + BOOST_CHECK(max_data == data1); + BOOST_CHECK(min_data == data0); + } + const Real exp_avg = (data0.first + data1.first)/2; + BOOST_CHECK_CLOSE(di2.GetAverage(), exp_avg, 1e-3); + const Real exp_std = std::abs(data0.first - exp_avg); + BOOST_CHECK_CLOSE(di2.GetStdDev(), exp_std, 1e-3); + BOOST_CHECK_CLOSE(di2.GetWeightedStdDev(2), exp_std * std::exp(-exp_avg/4), + 1e-3); + BOOST_CHECK_CLOSE(di2.GetNormStdDev(), exp_std/exp_avg, 1e-3); + // check other entries + BOOST_CHECK_EQUAL(d_map->GetDistances(1,2).GetDataSize(), 1u); + BOOST_CHECK_EQUAL(d_map->GetDistances(1,7).GetDataSize(), 0u); + BOOST_CHECK_EQUAL(d_map->GetDistances(4,7).GetDataSize(), 0u); +} + +BOOST_AUTO_TEST_CASE(test_variance_map) { + alg::DistanceMapPtr d_map = GetDistanceMap(); + // check variance map + alg::VarianceMapPtr std_mat = alg::CreateVarianceMap(d_map); + BOOST_CHECK_EQUAL(std_mat->GetSize(), 8); + for (uint row = 0; row < 8; ++row) { + for (uint col = 0; col < row; ++col) { + if ((col == 3 && row <= 5) || (row == 3 && col >= 2)) { + BOOST_CHECK(std_mat->Get(row, col) > 0); + } else { + BOOST_CHECK_EQUAL(std_mat->Get(row, col), Real(0)); + } + BOOST_CHECK_EQUAL(std_mat->Get(row, col), std_mat->Get(col, row)); + } + BOOST_CHECK_EQUAL(std_mat->Get(row, row), Real(0)); + } + Real max_std = std::max(std_mat->Get(3,4), std_mat->Get(2,3)); + max_std = std::max(std_mat->Get(3,5), max_std); + BOOST_CHECK_EQUAL(std_mat->Min(), Real(0)); + BOOST_CHECK_EQUAL(std_mat->Max(), max_std); + // check json export + String json = std_mat->GetJsonString(); + BOOST_CHECK_EQUAL(std::count(json.begin(), json.end(), '['), 8+1); + BOOST_CHECK_EQUAL(std::count(json.begin(), json.end(), ']'), 8+1); + BOOST_CHECK_EQUAL(std::count(json.begin(), json.end(), ','), 7*8 + 7); +} + +BOOST_AUTO_TEST_CASE(test_dist_to_mean) { + alg::DistanceMapPtr d_map = GetDistanceMap(); + // check distance to mean + alg::Dist2MeanPtr dist_to_mean = alg::CreateDist2Mean(d_map); + uint num_res = dist_to_mean->GetNumResidues(); + uint num_str = dist_to_mean->GetNumStructures(); + BOOST_CHECK_EQUAL(num_res, 8u); + BOOST_CHECK_EQUAL(num_str, 2u); + for (uint row = 0; row < num_res; ++row) { + for (uint col = 0; col < num_str; ++col) { + if (row >= 2 && row <= 5) { + BOOST_CHECK(dist_to_mean->Get(row, col) != 0); + } else { + if (dist_to_mean->Get(row, col) != 0) std::cout << row << ", " << col; + BOOST_CHECK_EQUAL(dist_to_mean->Get(row, col), Real(0)); + } + } + } + // check json export + String json = dist_to_mean->GetJsonString(); + BOOST_CHECK_EQUAL(std::count(json.begin(), json.end(), '['), 8+1); + BOOST_CHECK_EQUAL(std::count(json.begin(), json.end(), ']'), 8+1); + BOOST_CHECK_EQUAL(std::count(json.begin(), json.end(), ','), 1*8 + 7); +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/seq/alg/tests/testfiles/dummy.pdb b/modules/seq/alg/tests/testfiles/dummy.pdb new file mode 100644 index 0000000000000000000000000000000000000000..30bd0291d76fcf1580cef43f24ffab295caa8ec0 --- /dev/null +++ b/modules/seq/alg/tests/testfiles/dummy.pdb @@ -0,0 +1,41 @@ +ATOM 1 N ASP A 1 0.000 0.000 0.000 1.00 0.00 N +ATOM 2 CA ASP A 1 1.430 0.000 0.000 1.00 0.00 C +ATOM 3 C ASP A 1 1.866 1.425 0.000 1.00 0.00 C +ATOM 4 O ASP A 1 2.762 1.828 0.740 1.00 0.00 O +ATOM 5 CB ASP A 1 1.954 -0.709 -1.214 1.00 0.00 C +ATOM 6 N ASP A 2 1.227 2.248 -0.851 1.00 0.00 N +ATOM 7 CA ASP A 2 1.585 3.631 -0.913 1.00 0.00 C +ATOM 8 C ASP A 2 1.354 4.202 0.444 1.00 0.00 C +ATOM 9 O ASP A 2 2.176 4.940 0.984 1.00 0.00 O +ATOM 10 CB ASP A 2 0.748 4.344 -1.932 1.00 0.00 C +ATOM 11 N PHE A 3 0.198 3.868 1.046 1.00 0.00 N +ATOM 12 CA PHE A 3 -0.101 4.374 2.350 1.00 0.00 C +ATOM 13 C PHE A 3 0.977 3.896 3.260 1.00 0.00 C +ATOM 14 O PHE A 3 1.514 4.643 4.077 1.00 0.00 O +ATOM 15 CB PHE A 3 -1.437 3.875 2.812 1.00 0.00 C +ATOM 16 N ALA A 4 1.333 2.604 3.143 1.00 0.00 N +ATOM 17 CA ALA A 4 2.360 2.066 3.980 1.00 0.00 C +ATOM 18 C ALA A 4 3.599 2.847 3.707 1.00 0.00 C +ATOM 19 O ALA A 4 4.320 3.256 4.616 1.00 0.00 O +ATOM 20 CB ALA A 4 2.578 0.614 3.676 1.00 0.00 C +ATOM 21 N GLY A 5 3.887 3.083 2.415 1.00 0.00 N +ATOM 22 CA GLY A 5 5.058 3.826 2.067 1.00 0.00 C +ATOM 23 C GLY A 5 4.925 5.169 2.697 1.00 0.00 C +ATOM 24 O GLY A 5 5.861 5.699 3.294 1.00 0.00 O +ATOM 25 N THR A 6 3.729 5.772 2.579 1.00 0.00 N +ATOM 26 CA THR A 6 3.518 7.063 3.156 1.00 0.00 C +ATOM 27 C THR A 6 3.755 6.930 4.621 1.00 0.00 C +ATOM 28 O THR A 6 4.423 7.752 5.246 1.00 0.00 O +ATOM 29 CB THR A 6 2.119 7.534 2.891 1.00 0.00 C +ATOM 30 N HIS A 7 3.200 5.863 5.224 1.00 0.00 N +ATOM 31 CA HIS A 7 3.379 5.662 6.628 1.00 0.00 C +ATOM 32 C HIS A 7 4.845 5.537 6.866 1.00 0.00 C +ATOM 33 O HIS A 7 5.402 6.125 7.792 1.00 0.00 O +ATOM 34 CB HIS A 7 2.668 4.419 7.075 1.00 0.00 C +ATOM 35 N ASN A 8 5.526 4.749 6.016 1.00 0.00 N +ATOM 36 CA ASN A 8 6.937 4.576 6.176 1.00 0.00 C +ATOM 37 C ASN A 8 7.558 5.925 6.054 1.00 0.00 C +ATOM 38 O ASN A 8 8.423 6.279 6.854 1.00 0.00 O +ATOM 39 CB ASN A 8 7.475 3.654 5.122 1.00 0.00 C +TER 40 ASN A 8 +END diff --git a/modules/seq/base/doc/seq.rst b/modules/seq/base/doc/seq.rst index 5022c0aba584d651d8e0d0d056e89cbc635468ff..4c3e2bf17517818a8de3fac10fe02f8088191429 100644 --- a/modules/seq/base/doc/seq.rst +++ b/modules/seq/base/doc/seq.rst @@ -231,7 +231,7 @@ the same length. New instances of alignments are created with Typically sequence alignments are used column-based, i.e by looking at an aligned columns in the sequence alignment. To get a row-based (sequence) view -on the sequence list, use :meth:`GetSequenceList()`. +on the sequence list, use :meth:`GetSequences()`. All functions that operate on an alignment will again produce a valid alignment. This mean that it is not possible to change the length of one sequence, without @@ -277,7 +277,7 @@ an alignment: Returns the sequence at the given index, raising an IndexError when trying to access an inexistent sequence. - .. method:: GetSequenceList() + .. method:: GetSequences() Returns a list of all sequence of the alignment. @@ -343,7 +343,7 @@ an alignment: .. method:: Cut(start, end) Removes the columns in the half-closed interval `start`, `end` from the - alignment. + alignment. Note that this function does not update offsets! .. code-block:: python @@ -365,16 +365,6 @@ an alignment: :param new_region: The region to be inserted :type new_region: :class:`AlignedRegion` or :class:`AlignmentHandle` - - .. method:: ShiftRegion(start, end, amount, master=-1) - - Shift columns in the half-closed interval `start`, `end`. If amount is a - positive number, the columns are shifted to the right, if negative, the - columns are shifted to the left. - - If master is set to -1, all sequences in the region are affected, otherwise - only the sequence at index equal to master is shifted. - .. method:: GetMatchingBackboneViews(index1=0, index2=1) Returns a tuple of entity views containing matching backbone atoms for the diff --git a/modules/seq/base/src/alignment_handle.hh b/modules/seq/base/src/alignment_handle.hh index 9990e170570d4c19f4c0f4a9eaf5129ea1b2f60f..6ac044cfdbd8ecf8dc3d33c16fbd3ce4b8c7ff27 100644 --- a/modules/seq/base/src/alignment_handle.hh +++ b/modules/seq/base/src/alignment_handle.hh @@ -44,7 +44,7 @@ class AlignedColumnIterator; /// /// Typically sequence alignments are used column-based, i.e by looking at an /// aligned columns in the sequence alignment. To get a row-based (sequence) -/// view on the sequence list, use AlignmentHandle::GetSequenceList(). For an +/// view on the sequence list, use AlignmentHandle::GetSequences(). For an /// overview of how to use the sequence module, see \ref module_seq "here" /// /// All operators that operate on an alignment will again produce a valid