Commit 62d38255 authored by Tauriello Gerardo's avatar Tauriello Gerardo
Browse files

Merge branch 'release-1.2.0'

parents 168148a0 8855768a
......@@ -5,7 +5,36 @@
Changelog
================================================================================
Release 1.1
Release 1.2.0
--------------------------------------------------------------------------------
* Graph optimizer has been separated from the sidechain module and can now be
used for arbitrary optimization problems. Available optimization algorithms
are TreePack, AStar and MonteCarlo.
* Make it possible to distinguish between the scores of a loop towards a constant
environment (external scores) and the scores within the loop itself
(internal scores).
* Most scores based on pairwise interactions are now pairwise decomposable.
* Disconnected loops at several locations can be scored at once.
* Avoid the usage of the DSSP executable and use the OpenStructure internal
secondary structure assignment instead.
* Allow to decide whether the CB atom belongs to the actual rotamer or to the
constant frame in sidechain optimization. This can be useful in design
questions, where the identity of a rotamer is not given and you want to
allow glycine or alanine.
* The naming of the entries in the StructureDB is not strictly limited to
4 letter codes anymore, arbitrary strings can be used instead.
* Adding coordinates to the StructureDB does not require external tools anymore
(msms, dssp etc.), internal implementations are used instead.
* The data that is stored in a StructureDB can be controlled at initialization,
everything except the sequence and the actual coordinates is optional.
* Entries in the StructureDB can be removed again.
* Allow to search positions of arbitrary copies in DynamicSpatialOrganizer
by providing RT operators.
* Several minor bug fixes, improvements, and speed-ups
Release 1.1.0
--------------------------------------------------------------------------------
* Updated dependencies: need Eigen 3.3.0 and OST 1.7
......
......@@ -16,7 +16,7 @@ include(PROMOD3)
# versioning info
set(PROMOD3_VERSION_MAJOR 1)
set(PROMOD3_VERSION_MINOR 1)
set(PROMOD3_VERSION_MINOR 2)
set(PROMOD3_VERSION_PATCH 0)
set(PROMOD3_VERSION_STRING ${PROMOD3_VERSION_MAJOR}.${PROMOD3_VERSION_MINOR})
set(PROMOD3_VERSION_STRING ${PROMOD3_VERSION_STRING}.${PROMOD3_VERSION_PATCH})
......@@ -84,9 +84,9 @@ if(NOT DISABLE_DOCUMENTATION)
find_package(Sphinx ${PYTHON_VERSION} REQUIRED)
set(PYTHON_DOC_URL "https://docs.python.org/${PYTHON_VERSION}")
# set this to the URL corresponding to the version of OST you are using
set(OST_DOC_URL "http://www.openstructure.org/docs/dev")
set(OST_DOC_URL "https://www.openstructure.org/docs/dev")
endif()
find_package(OPENSTRUCTURE 1.7 REQUIRED
find_package(OPENSTRUCTURE 1.8 REQUIRED
COMPONENTS io mol seq seq_alg mol_alg conop img mol_mm)
if(CMAKE_COMPILER_IS_GNUCXX)
......@@ -126,8 +126,8 @@ set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES
add_subdirectory(config)
add_subdirectory(core)
add_subdirectory(loop)
add_subdirectory(sidechain)
add_subdirectory(scoring)
add_subdirectory(sidechain)
add_subdirectory(modelling)
add_subdirectory(scripts)
add_subdirectory(actions)
......
......@@ -13,7 +13,6 @@ Please see the ProMod3 documentation for details on more advanced usage.
import os
import ost
from ost import io, settings
from ost.bindings import dssp
from promod3 import modelling
from promod3.core import pm3argparse, helper
......@@ -38,19 +37,12 @@ if len(opts.alignments) == 1:
elif len(opts.alignments) > 1:
ost.LogInfo("Build " + str(len(opts.alignments)) + " models with the "+
"following alignments:")
dssp_usable = True
for aln in opts.alignments:
ost.LogInfo(aln.ToString(80))
if dssp_usable:
for i in range(1, aln.GetCount()):
try:
dssp.AssignDSSP(aln.GetSequence(i).GetAttachedView())
except settings.FileNotFound:
dssp_usable = False
break
if not dssp_usable:
ost.LogInfo("dssp binary is missing. You can get better modelling results "+
"if you install dssp.")
for i in range(1, aln.GetCount()):
ost.mol.alg.AssignSecStruct(aln.GetSequence(i).GetAttachedView())
# model it
try:
......
......@@ -5,6 +5,7 @@ helper.rst
geometry.rst
runtime_profiling.rst
setcompoundschemlib.rst
graph_minimizer.rst
)
add_doc_source(NAME core RST ${CORE_RST})
Graph Minimizer
================================================================================
.. currentmodule:: promod3.core
The graph minimizer solves an energy minimization problem where we have n
nodes :math:`N_i`, with each node having several possible solutions.
Every solution has a self energy :math:`E_{self}` and pairwise energies in between nodes
are possible. The goal is to select exactly one solution per node to obtain
a set :math:`X=[x_1, x_2, ..., x_n]` that minimizes:
.. math::
F(X)=\displaystyle\sum_iE_{self}(N_i[x_i]) +\displaystyle \sum_i \displaystyle \sum_{j>i}E_{pair}(N_i[x_i], N_j[x_j])
.. class:: GraphMinimizer
.. method:: AddNode(self_energies)
Adds a node to the graph.
:param self_energies: Directly controls the number of possible solutions in that
node and assigns the corresponding self energies
:type self_energies: :class:`list` of :class:`float`
:returns: The idx of the added node
:rtype: :class:`int`
.. method:: AddEdge(node_idx_one, node_idx_two, pairwise_energies)
Adds an edge between the specified nodes.
:param node_idx_one: Index of the first node
:param node_idx_two: Index of the second node
:param pairwise_energies: The pairwise energies that contribute to the
overall energy function. There must be a list for
every possible solution in node one. All of those
lists must have exactly the length of possible
solutions in node two.
:type node_idx_one: :class:`int`
:type node_idx_two: :class:`int`
:type pairwise_energies: :class:`list` of :class:`list` of :class:`float`
:returns: The idx of the added edge
:rtype: :class:`int`
:raises: :exc:`~exceptions.RuntimeError` if *node_idx_one* or *node_idx_two*
specify non existent nodes or when *pairwise_energies* is
inconsistent with the number of solutions in the specified nodes.
.. method:: ApplyDEE(node_idx, [e_cut=0.0])
Applies dead end elimination on one particular node and potentially
deactivates certain solutions. The goldstein criterion is described in
[goldstein1994]_.
:param node_idx: Node to apply dead end elimination
:param e_cut: If set to
0.0, the default goldstein criterion is applied =>
a solution is removed if it's dominated by all other
solutions in the same node. If you increase this value,
a solution must be dominated by at least this **e_cut**.
:type node_idx: :class:`int`
:type e_cut: :class:`float`
:returns: :class:`bool` whether any solution has been deactivated.
.. method:: ApplyEdgeDecomposition(edge_idx, epsilon)
Applies edge decomposition on one particular edge and potentially
deactivates it. The exact decomposition procedure is described in
[krivov2009]_.
:param edge_idx: Edge to decompose.
:param epsilon: The energy threshold to perform edge decomposition.
:type edge_idx: :class:`int`
:type epsilon: :class:`float`
:returns: :class:`bool` whether the edge has been decomposed and
deactivated
.. method:: Prune(epsilon, [e_cut=0.0, consider_all_nodes=False])
Performs edge decomposition followed by dead end elimination in an
iterative manner until no changes can be observed anymore given
**epsilon**.
:param epsilon: The energy threshold to perform edge decomposition.
:param e_cut: Parameter to control dead end elimination.
:param consider_all_nodes: Flag, wether the dead end elimination should be
applied to all nodes, or only those who are
connected with an edge removed by edge
decomposition. This is useful if already a Prune
operation has been performed => only those nodes
connected to a removed edge have the chance for
successful dead end elimination.
:type epsilon: :class:`float`
:type e_cut: :class:`float`
:type consider_all_nodes: :class:`bool`
.. method:: Reset()
Resets the graph by undoing any pruning operation (DEE and edge decomposition)
.. method:: TreeSolve([max_complexity=inf, initial_epsilon=0.02])
The method solves a graph using a minimal width tree decomposition
approach in an iterative manner. In every iteration, the algorithm performs
a pruning step (DEE / Edge Decomposition) and subsequently tries to solve
each connected component in the graph separately.
If the number of possible enumerations in the tree constructetd from a
particular connected component is is larger **max_complexity**,
this component is solved in a later iteration. The algorithm iterates until
all connected components are solved and steadily increases the epsilon value
resulting in a more and more agressive edge decomposition.
Algorithm further descsribed in [krivov2009]_.
:param max_complexity: Max number of possible permutations, that have to
be enumerated in the resulting tree after tree
decomposition.
:param initial_epsilon: The initial energy threshold to perform edge
decomposition.
:type max_complexity: :class:`int`
:type initial_epsilon: :class:`float`
:returns: A tuple with the first element being a list of indices
representing the single solutions minimizing
the overall energy function.
The second element is the according energy value.
.. method:: AStarSolve(e_thresh, [max_n=100, max_visited_nodes=100000000])
The method solves a graph using the A\* algorithm. Besides creating the
minimal energy solution, the function produces a maximum of **max_n**
solutions sorted by energy. It aborts as soon as it sees the first solution
with an energy difference of **e_thresh** to the optimal solution or hits
**max_n**. If you're only interested in the optimal solution you should use
the TreeSolve function since it's much faster and uses less memory.
There is no automatic pruning of the graph using DEE or edge decomposition,
so you have to do it by yourself, otherwise you'll have a looooooong
runtime or even hit the **max_visited_nodes** parameter that caps the memory
usage.
To have a valid solution you have to take care that you set the **e_cut**
parameter in the pruning function to **e_tresh**.
Algorithm is described in [leach1998]_.
:param e_thresh: Maximal energy difference of a solution to the
optimal solution to be considered.
:param max_n: The maximum number of solutions that will be generated.
:param max_visited_nodes: Caps the memory usage of the algorithm. Besides
The memory used for pairwise energies and self
energies, the algorithm uses about
**max_visited_nodes** * 20 bytes.
:type e_thresh: :class:`float`
:type max_n: :class:`int`
:type max_visited_nodes: :class:`int`
:returns: A tuple with the first element being a list of
solutions. The second element is a list
of energies for the according solutions.
.. method:: MCSolve([n=100, mc_steps=100000, start_temperature=1000.0, \\
change_frequency=1000, cooling_factor=0.9, seed=0])
Does a total of **n** Monte Carlo runs to find low energy solutions
of the graph. Each run starts with a random
configuration. At each of the **mc_steps** steps, a random node and
a random solution at that location is selected and an energy difference
of that random selection relative to the current configuration is
estimated. If the difference in energy is negative, the step is
accepted. If not, the step is accepted with a probability given by
the temperature dependent Metropolis criterion
:math:`exp^{\left(\frac{-e_{diff}}{T}\right)}`.
The temperature for every run starts with **start_temperature**
and is multiplied every **change_frequency** steps with **cooling_factor**
to achieve a simulated annealing effect.
There is no automatic pruning of the graph using DEE or edge decomposition,
you have to do that by yourself.
:param n: Number of Monte Carlo runs
:param mc_steps: Number of Monte Carlo steps per run
:param start_temperature: Start temperature for the temperature dependent
Metropolis criterion
:param change_frequency: Number of steps the temperature stays the same
:param cooling_factor: Factor to multiply temperature each
**change_frequency** steps
:param seed: Seed for random number generator
:type n: :class:`int`
:type mc_steps: :class:`int`
:type start_temperature: :class:`float`
:type change_frequency: :class:`int`
:type cooling_factor: :class:`float`
:type seed: :class:`float`
:returns: A tuple with the first element being a list of
solutions. The second element is a list
of energies for the according solutions.
.. method:: NaiveSolve()
Don't even think of using this... This function only exists for debug
purposes and does the full enumeration of the solution space.
It might become faster with the appropriate
`techniques <https://www.youtube.com/watch?v=fQGbXmkSArs>`_.
:returns: A tuple with the first element being a list of indices
representing the single solutions minimizing
the overall energy function.
The second element is the according energy value.
.. [goldstein1994] Goldstein RF (1994). Efficient rotamer elimination applied to protein side-chains and related spin glasses. Biophys J.
.. [leach1998] Leach AR, Lemon AP (1998). Explring the conformational space of prootein side chains using dead-end elimination and the A* algorithm. Proteins.
......@@ -16,3 +16,4 @@ Contents:
helper
geometry
runtime_profiling
graph_minimizer
......@@ -2,6 +2,7 @@
set(CORE_CPP
export_geom.cc
export_runtime_profiling.cc
export_graph_minimizer.cc
wrap_core.cc
)
......
#include <boost/python.hpp>
#include <boost/python/register_ptr_to_python.hpp>
#include <promod3/sidechain/rotamer_graph.hh>
#include <promod3/core/graph_minimizer.hh>
#include <promod3/core/eigen_types.hh>
#include <promod3/core/export_helper.hh>
#include <promod3/core/message.hh>
using namespace boost::python;
using namespace promod3::sidechain;
using namespace promod3;
namespace {
RotamerGraphPtr WrapRRMList(boost::python::list& rotamer_groups) {
std::vector<RRMRotamerGroupPtr> v_rotamer_groups;
core::ConvertListToVector(rotamer_groups, v_rotamer_groups);
return RotamerGraph::CreateFromRRMList(v_rotamer_groups);
int WrapAddNode(promod3::core::GraphMinimizerPtr graph,
const boost::python::list& self_energies) {
std::vector<Real> v_self_energies;
promod3::core::ConvertListToVector(self_energies, v_self_energies);
return graph->AddNode(v_self_energies);
}
RotamerGraphPtr WrapFRMList(boost::python::list& rotamer_groups) {
std::vector<FRMRotamerGroupPtr> v_rotamer_groups;
core::ConvertListToVector(rotamer_groups, v_rotamer_groups);
return RotamerGraph::CreateFromFRMList(v_rotamer_groups);
int WrapAddEdge(promod3::core::GraphMinimizerPtr graph,
int node_idx_one, int node_idx_two,
const boost::python::list& pairwise_energies) {
int num_rows = boost::python::len(pairwise_energies);
if(num_rows == 0) {
throw promod3::Error("Pairwise energies must not be empty!");
}
int num_cols = len(extract<list>(pairwise_energies[0]));
promod3::core::EMatXX emat = promod3::core::EMatXX::Zero(num_rows, num_cols);
for(int i = 0; i < num_rows; ++i) {
list current_list = extract<list>(pairwise_energies[i]);
if(len(current_list) != num_cols) {
throw promod3::Error("All lists must have exactly the same length!");
}
for(int j = 0; j < num_cols; ++j) {
emat(i,j) = extract<Real>(current_list[j]);
}
}
return graph->AddEdge(node_idx_one, node_idx_two, emat);
}
boost::python::tuple WrapTreeSolve(RotamerGraphPtr graph, uint64_t max_complexity,
boost::python::tuple WrapTreeSolve(promod3::core::GraphMinimizerPtr graph,
uint64_t max_complexity,
Real initial_epsilon) {
std::pair<std::vector<int>, Real>
full_solution = graph->TreeSolve(max_complexity,initial_epsilon);
full_solution = graph->TreeSolve(max_complexity, initial_epsilon);
std::vector<int> solution = full_solution.first;
Real energy = full_solution.second;
boost::python::list return_list;
core::AppendVectorToList(solution, return_list);
promod3::core::AppendVectorToList(solution, return_list);
return boost::python::make_tuple(return_list, energy);
}
boost::python::tuple WrapAStarSolve(RotamerGraphPtr graph,
boost::python::tuple WrapAStarSolve(promod3::core::GraphMinimizerPtr graph,
Real e_thresh, uint max_n,
uint max_visited_nodes) {
std::pair<std::vector<std::vector<int> >, std::vector<Real> >
full_solution = graph->AStarSolve(e_thresh, max_n, max_visited_nodes);
......@@ -46,23 +76,23 @@ boost::python::tuple WrapAStarSolve(RotamerGraphPtr graph,
for(uint i = 0; i < solutions.size(); ++i){
boost::python::list actual_solution;
core::AppendVectorToList(solutions[i], actual_solution);
promod3::core::AppendVectorToList(solutions[i], actual_solution);
solution_list.append(actual_solution);
}
core::AppendVectorToList(energies, energy_list);
promod3::core::AppendVectorToList(energies, energy_list);
return boost::python::make_tuple(solution_list, energy_list);
}
boost::python::tuple WrapMCSolve(RotamerGraphPtr graph, int n,
boost::python::tuple WrapMCSolve(promod3::core::GraphMinimizerPtr graph, int n,
int mc_steps, Real start_temperature,
int change_frequency, Real cooling_factor,
int seed) {
std::pair<std::vector<std::vector<int> >, std::vector<Real> >
full_solution = graph->MCSolve(n, mc_steps, start_temperature,
change_frequency, cooling_factor,
seed);
change_frequency, cooling_factor, seed);
std::vector<std::vector<int> > solutions = full_solution.first;
std::vector<Real> energies = full_solution.second;
......@@ -72,99 +102,57 @@ boost::python::tuple WrapMCSolve(RotamerGraphPtr graph, int n,
for(uint i = 0; i < solutions.size(); ++i){
boost::python::list actual_solution;
core::AppendVectorToList(solutions[i], actual_solution);
promod3::core::AppendVectorToList(solutions[i], actual_solution);
solution_list.append(actual_solution);
}
core::AppendVectorToList(energies, energy_list);
promod3::core::AppendVectorToList(energies, energy_list);
return boost::python::make_tuple(solution_list, energy_list);
}
boost::python::list WrapGetActiveRotamers(RotamerGraphPtr graph,
int idx) {
std::vector<int> active_rotamers;
graph->GetActiveRotamers(idx, active_rotamers);
boost::python::list return_list;
core::AppendVectorToList(active_rotamers, return_list);
return return_list;
}
boost::python::list WrapGetEdges(RotamerGraphPtr graph,
int idx) {
std::vector<std::pair<int,int> > edges;
graph->GetEdges(edges);
boost::python::list return_list;
core::AppendVectorToList(edges, return_list);
return return_list;
}
boost::python::list WrapGetActiveEdges(RotamerGraphPtr graph,
int idx) {
std::vector<std::pair<int,int> > edges;
graph->GetActiveEdges(edges);
boost::python::list return_list;
core::AppendVectorToList(edges, return_list);
return return_list;
}
boost::python::tuple WrapNaiveSolve(promod3::core::GraphMinimizerPtr graph) {
boost::python::list WrapGetSelfEnergies(RotamerGraphPtr graph,
int idx) {
std::vector<Real> self_energies;
graph->GetSelfEnergies(idx, self_energies);
boost::python::list return_list;
core::AppendVectorToList(self_energies, return_list);
return return_list;
}
boost::python::list WrapGetPairwiseEnergies(RotamerGraphPtr graph,
int idx) {
std::vector<std::vector<Real> > pairwise_energies;
graph->GetPairwiseEnergies(idx, pairwise_energies);
std::pair<std::vector<int>, Real>
full_solution = graph->NaiveSolve();
std::vector<int> solution = full_solution.first;
Real energy = full_solution.second;
boost::python::list return_list;
for(uint i = 0; i < pairwise_energies.size(); ++i) {
boost::python::list row;
core::AppendVectorToList(pairwise_energies[i], row);
return_list.append(row);
}
return return_list;
promod3::core::AppendVectorToList(solution, return_list);
return boost::python::make_tuple(return_list, energy);
}
}
void export_Graph()
void export_graph_minimizer()
{
class_<RotamerGraph, boost::noncopyable>("RotamerGraph", no_init)
.def("CreateFromRRMList", &WrapRRMList, (arg("rotamer_groups")))
.staticmethod("CreateFromRRMList")
.def("CreateFromFRMList", &WrapFRMList, (arg("rotamer_groups")))
.staticmethod("CreateFromFRMList")
.def("Prune", &RotamerGraph::Prune,
(arg("epsilon"), arg("e_cut")=0.0, arg("consider_all_nodes")=false))
.def("Reset", &RotamerGraph::Reset)
.def("GetNumNodes", &RotamerGraph::GetNumNodes)
.def("GetNumEdges", &RotamerGraph::GetNumEdges)
.def("GetNumActiveNodes", &RotamerGraph::GetNumActiveNodes)
.def("GetNumActiveEdges", &RotamerGraph::GetNumActiveEdges)
.def("GetActiveRotamers", &WrapGetActiveRotamers, (arg("node_idx")))
.def("GetEdges", &WrapGetEdges)
.def("GetActiveEdges", &WrapGetActiveEdges)
.def("GetSelfEnergies", &WrapGetSelfEnergies, (arg("node_idx")))
.def("GetPairwiseEnergies", &WrapGetPairwiseEnergies,(arg("edge_idx")))
.def("ApplyDEE", &RotamerGraph::ApplyDEE,(arg("node_idx"), arg("e_thresh")=0.0))
.def("ApplyEdgeDecomposition", &RotamerGraph::ApplyEdgeDecomposition,(arg("edge_idx"), arg("e_thresh")=0.02))
class_<promod3::core::GraphMinimizer,
boost::noncopyable>("GraphMinimizer", init<>())
.def("AddNode", &WrapAddNode, (arg("self_energies")))
.def("AddEdge", &WrapAddEdge, (arg("node_idx_one"), arg("node_idx_two"),
arg("pairwise_energies")))
.def("ApplyDEE", &promod3::core::GraphMinimizer::ApplyDEE,
(arg("node_idx"), arg("e_thresh")=0.0))
.def("ApplyEdgeDecomposition",
&promod3::core::GraphMinimizer::ApplyEdgeDecomposition,
(arg("edge_idx"), arg("e_thresh")=0.02))
.def("Prune", &promod3::core::GraphMinimizer::Prune,
(arg("epsilon")=0.0, arg("e_cut")=0.0, arg("consider_all_nodes")=false))
.def("Reset", &promod3::core::GraphMinimizer::Reset)
.def("TreeSolve", &WrapTreeSolve,
(arg("max_complexity")=std::numeric_limits<uint64_t>::max(),
arg("initial_epsilon")=0.02))
.def("AStarSolve", &WrapAStarSolve,
(arg("e_thresh"), arg("max_n")=100, arg("max_visited_nodes")=100000000))
(arg("e_thresh")=1.0, arg("max_n")=100,
arg("max_visited_nodes")=100000000))
.def("MCSolve", &WrapMCSolve,
(arg("n")=100, arg("mc_steps")=100000, arg("start_temperature")=1000.0,
arg("change_frequency")=1000, arg("cooling_factor")=0.9, arg("seed")=0))
.def("NaiveSolve", &WrapNaiveSolve)
;
register_ptr_to_python<RotamerGraphPtr>();
register_ptr_to_python<promod3::core::GraphMinimizerPtr>();
}
......@@ -2,9 +2,11 @@
void export_geom();
void export_runtime_profiling();
void export_graph_minimizer();
BOOST_PYTHON_MODULE(_core)
{
export_geom();
export_runtime_profiling();
export_graph_minimizer();
}
......@@ -15,6 +15,8 @@ set(CORE_HEADERS
graph.hh
tree.hh
tetrahedral_polytope.hh
graph_minimizer.hh
</