diff --git a/modules/seq/alg/doc/seqalg.rst b/modules/seq/alg/doc/seqalg.rst index dba9045bdf3182df2005cd8c26bf17fd7b649e67..02f74c4f78015926f5faae38c54d5a4c72f24218 100644 --- a/modules/seq/alg/doc/seqalg.rst +++ b/modules/seq/alg/doc/seqalg.rst @@ -41,7 +41,26 @@ .. autofunction:: AlignmentFromChainView +.. function:: Conservation(aln, assign=true, prop_name="cons", ignore_gap=false) + Calculates conservation scores for each column in the alignment, according to + the ConSurf method (Armon et al., J. Mol. Biol. (2001) 307, 447-463). + + The conservation score is a value between 0 and 1. The bigger the number + the more conserved the aligned residues are. + + :param aln: An alignment handle + :type aln: :class:`~ost.seq.AlignmentHandle` + :param assign: If true, the conservation scores are assigned to attached + residues. The name of the property can be changed with the prop_name + parameter. Useful when coloring entities based on sequence conservation. + :param prop_name: The property name for assigning the conservation to + attached residues. Defaults to 'cons'. + :param ignore_gap: If true, the dissimilarity between two gaps is increased to + 6.0 instead of 0.5 as defined in the original version. Without this, a + stretch where in the alignment there is only one sequence which is + aligned to only gaps, is considered highly conserved (depending on the + number of gap sequences). .. function:: LocalAlign(seq1, seq2, subst_weight, gap_open=-5, gap_ext=-2) diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index 5c1fba67787786393921b1c5710be59ba6079381..9239d1ec3d2e759729cfb1bcde0fb9005e52ded1 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -53,7 +53,7 @@ BOOST_PYTHON_MODULE(_ost_seq_alg) .def("SetWeight", &SubstWeightMatrix::SetWeight) ; def("MergePairwiseAlignments", &MergePairwiseAlignments); - def("Conservation", &Conservation, (arg("assign")=true, arg("prop_name")="cons")); + def("Conservation", &Conservation, (arg("assign")=true, arg("prop_name")="cons", arg("ignore_gap")=false)); def("LocalAlign", &LocalAlign, (arg("seq1"), arg("seq2"),arg("subst_weight"), arg("gap_open")=-5, arg("gap_ext")=-2)); def("GlobalAlign", &GlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"), diff --git a/modules/seq/alg/src/conservation.cc b/modules/seq/alg/src/conservation.cc index 2b5bf3481c22a5801176b1aa1f8ff227df1f7dc4..e80077efd254deead27d156cd556d0da2f90721f 100644 --- a/modules/seq/alg/src/conservation.cc +++ b/modules/seq/alg/src/conservation.cc @@ -75,7 +75,7 @@ static float CHEM_DISSIM[][24]={ -1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00} }; -float PhysicoChemicalDissim(char c1, char c2) +float PhysicoChemicalDissim(char c1, char c2, bool ignore_gap) { static int indices[]={2, 23, 0, 9, 7, 17, 3, 10, 15, -1, 11, 14, 16, 8, -1, 1, 6, 12, 4, 5, @@ -94,12 +94,15 @@ float PhysicoChemicalDissim(char c1, char c2) s=CHEM_DISSIM[idx_b][idx_a-idx_b]; else s=CHEM_DISSIM[idx_a][idx_b-idx_a]; + if (ignore_gap && idx_a==20 && idx_b==20) { + s=6.0; + } assert(s>=0.0); return s; } std::vector<Real> Conservation(const AlignmentHandle& aln, bool assign, - const String& prop) + const String& prop, bool ignore_gap) { std::vector<Real> cons(aln.GetLength(), 0.0); int comb=(aln.GetCount()*(aln.GetCount()-1))/2; @@ -108,7 +111,7 @@ std::vector<Real> Conservation(const AlignmentHandle& aln, bool assign, AlignedColumn c=aln[col]; for (int i=0; i<aln.GetCount(); ++i) { for (int j=i+1; j<aln.GetCount(); ++j) { - score+=PhysicoChemicalDissim(c[i], c[j]); + score+=PhysicoChemicalDissim(c[i], c[j], ignore_gap); } } score=1.0-score/(6.0*comb); diff --git a/modules/seq/alg/src/conservation.hh b/modules/seq/alg/src/conservation.hh index 8e4146ee790f68056ad55e1b6efa62466c5e3135..651b5bdd150348ef812bf839e37e458e2fc2eb86 100644 --- a/modules/seq/alg/src/conservation.hh +++ b/modules/seq/alg/src/conservation.hh @@ -35,9 +35,15 @@ namespace ost { namespace seq { namespace alg { /// parameter. Useful when coloring entities based on sequence conservation. /// \p prop_name The property name for assigning the conservation to /// attached residues. Defaults to 'cons'. +/// \p ignore_gap If true, the dissimilarity between two gaps is increased to +/// 6.0 instead of 0.5 as defined in the original version. Without this, a +/// stretch where in the alignment there is only one sequence which is +/// aligned to only gaps, is considered highly conserved (depending on the +/// number of gap sequences). /// std::vector<Real> DLLEXPORT_OST_SEQ_ALG Conservation(const AlignmentHandle& aln, bool assign=true, - const String& prop_name="cons"); + const String& prop_name="cons", + bool ignore_gap=false); }}}