diff --git a/modules/db/pymod/export_linear_db.cc b/modules/db/pymod/export_linear_db.cc index 314f49ae6d001bdc9c4d1258544d719ad8f0513c..8b9f7f942551a7bd5105dc0cdff59b607416e200 100644 --- a/modules/db/pymod/export_linear_db.cc +++ b/modules/db/pymod/export_linear_db.cc @@ -147,6 +147,27 @@ void WrapGetPositions(LinearPositionContainerPtr container, container->GetPositions(p_range, positions); } + +tuple WrapExtractTemplateData(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + LinearIndexer& indexer, + LinearCharacterContainer& seqres_container, + LinearCharacterContainer& atomseq_container, + LinearPositionContainer& position_container) { + + std::vector<int> v_residue_numbers; + geom::Vec3List ca_positions; + + ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer, + seqres_container, atomseq_container, + position_container, v_residue_numbers, + ca_positions); + + list residue_numbers; + VecToList(v_residue_numbers, residue_numbers); + return boost::python::make_tuple(residue_numbers, ca_positions); +} + } @@ -204,5 +225,14 @@ void export_linear_db() { arg("atomseq_container"), arg("position_container"), arg("seq"), arg("positions"))); + + def("ExtractTemplateData", &WrapExtractTemplateData, (arg("entry_name"), + arg("chain_name"), + arg("aln"), + arg("linear_indexer"), + arg("seqres_container"), + arg("atomseq_container"), + arg("position_container"))); + } diff --git a/modules/db/src/extract_data_helper.cc b/modules/db/src/extract_data_helper.cc index fba11bbf0f9c81b20f66495e6b4af8bb840e39e1..607499e8fd009972a163c21f9327b4ceadac0e09 100644 --- a/modules/db/src/extract_data_helper.cc +++ b/modules/db/src/extract_data_helper.cc @@ -48,4 +48,66 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name, valid_seq.end())); } +void ExtractTemplateData(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + LinearIndexer& indexer, + LinearCharacterContainer& seqres_container, + LinearCharacterContainer& atomseq_container, + LinearPositionContainer& position_container, + std::vector<int>& residue_numbers, + geom::Vec3List& ca_positions) { + + std::pair<uint64_t, uint64_t> data_range = indexer.GetDataRange(entry_name, + chain_name); + + String template_seqres = aln.GetSequence(1).GetGaplessString(); + data_range.first += aln.GetSequence(1).GetOffset(); + data_range.second = data_range.first + template_seqres.size(); + + // check, whether the the template seqres is consistent with what + // we find in seqres_container + String expected_template_seqres; + seqres_container.GetCharacters(data_range, expected_template_seqres); + if(expected_template_seqres != template_seqres) { + throw std::runtime_error("Template sequence in input alignment is " + "inconsistent with sequence in SEQRES container!"); + } + + String template_atomseq; + atomseq_container.GetCharacters(data_range, template_atomseq); + geom::Vec3List extracted_positions; + position_container.GetPositions(data_range, extracted_positions); + + uint current_rnum = aln.GetSequence(0).GetOffset() + 1; + uint current_template_pos = 0; + String seqres_seq = aln.GetSequence(0).GetString(); + String template_seq = aln.GetSequence(1).GetString(); + + // prepare output + uint template_atomseq_size = template_atomseq.size(); + ca_positions.clear(); + residue_numbers.clear(); + ca_positions.reserve(template_atomseq_size); + residue_numbers.reserve(template_atomseq_size); + + for(int i = 0; i < aln.GetLength(); ++i) { + + if(seqres_seq[i] != '-' && template_seq[i] != '-') { + if(template_atomseq[current_template_pos] != '-') { + // it is aligned and we have a valid position! + residue_numbers.push_back(current_rnum); + ca_positions.push_back(extracted_positions[current_template_pos]); + } + } + + if(seqres_seq[i] != '-') { + ++current_rnum; + } + + if(template_seq[i] != '-') { + ++current_template_pos; + } + } +} + }} //ns diff --git a/modules/db/src/extract_data_helper.hh b/modules/db/src/extract_data_helper.hh index 844f56bf3c3b3bb721d5099550d134f66bd03775..288ed953688f50b7a806aab8416fa6896fe6bdc1 100644 --- a/modules/db/src/extract_data_helper.hh +++ b/modules/db/src/extract_data_helper.hh @@ -24,6 +24,7 @@ #include <ost/seq/sequence_handle.hh> #include <ost/db/linear_indexer.hh> #include <ost/db/binary_container.hh> +#include <ost/seq/alignment_handle.hh> namespace ost { namespace db { @@ -34,6 +35,15 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name, ost::seq::SequenceHandle& seq, geom::Vec3List& positions); +void ExtractTemplateData(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + LinearIndexer& indexer, + LinearCharacterContainer& seqres_container, + LinearCharacterContainer& atomseq_container, + LinearPositionContainer& position_container, + std::vector<int>& residue_numbers, + geom::Vec3List& ca_positions); + }} //ns #endif \ No newline at end of file