diff --git a/modules/db/doc/db.rst b/modules/db/doc/db.rst index a296c1ec6e5db939085a79dab3563841a38c5b39..87dc0919e840133815ea56af101ca8d9096b2d11 100644 --- a/modules/db/doc/db.rst +++ b/modules/db/doc/db.rst @@ -309,14 +309,14 @@ Data Extraction -------------------------------------------------------------------------------- Openstructure provides data extraction functionality for the following scenario: -There are three binary container. A position container to hold CA-positions -(:class:`LinearPositionContainer`), a SEQRES container and -an ATOMSEQ container (both: :class:`LinearCharacterContainer`). -They contain entries from the protein structure database +There is a SEQRES container and an ATOMSEQ container +(both: :class:`LinearCharacterContainer`). +Positions are stored in :class:`LinearPositionContainer`. +The containers contain entries from the protein structure database and sequence/position data is relative to the SEQRES of those entries. This means, if the SEQRES has more characters as there are resolved residues -in the structure, the entry in the position container still contains the exact -number of SEQRES characters but some position remain invalid. Thats where the +in the structure, the entry in the position container(s) still contains the exact +number of SEQRES characters but some positions remain invalid. Thats where the ATOMSEQ container comes in. It only contains matching residues to the SEQRES but marks non-resolved residues with '-'. @@ -378,13 +378,16 @@ marks non-resolved residues with '-'. :type indexer: :class:`LinearIndexer` :type seqres_container: :class:`LinearCharacterContainer` :type atomseq_container: :class:`LinearCharacterContainer` - :type position_container: :class:`LinearPositionContainer` + :type position_container: :class:`LinearPositionContainer` or + :class:`list` of :class:`LinearPositionContainer` :returns: First element: :class:`list` of residue numbers that relate each entry in the second element to the target sequence specified in *aln*. The numbering scheme - starts from one. Second Element: :class:`geom.Vec3List` - with the according positions. + starts from one. Second Element: the according positions. + :class:`ost.geom.Vec3List` if *position_container* was a + :class:`LinearPositionContainer`, a list of + :class:`ost.geom.Vec3List` if it was a :class:`list`. :rtype: :class:`tuple` :raises: :exc:`ost.Error` if requested data is not present in diff --git a/modules/db/pymod/export_linear_db.cc b/modules/db/pymod/export_linear_db.cc index 597f4243e1f447d47d39f771b12a09a56e6c5e57..1cb623cb5b8644d6af0dbd939ca84fc8d44a31c0 100644 --- a/modules/db/pymod/export_linear_db.cc +++ b/modules/db/pymod/export_linear_db.cc @@ -148,24 +148,50 @@ void WrapGetPositions(LinearPositionContainerPtr container, } -tuple WrapExtractTemplateData(const String& entry_name, const String& chain_name, - const ost::seq::AlignmentHandle& aln, - LinearIndexer& indexer, - LinearCharacterContainer& seqres_container, - LinearCharacterContainer& atomseq_container, - LinearPositionContainer& position_container) { +tuple WrapExtractTemplateDataSingle(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + LinearIndexerPtr indexer, + LinearCharacterContainerPtr seqres_container, + LinearCharacterContainerPtr atomseq_container, + LinearPositionContainerPtr position_container) { + + std::vector<LinearPositionContainerPtr> position_containers; + position_containers.push_back(position_container); + std::vector<int> v_residue_numbers; + std::vector<geom::Vec3List> position_vec; + ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer, + seqres_container, atomseq_container, + position_containers, v_residue_numbers, + position_vec); + + list residue_numbers; + VecToList(v_residue_numbers, residue_numbers); + return boost::python::make_tuple(residue_numbers, position_vec[0]); +} + +tuple WrapExtractTemplateDataList(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + LinearIndexerPtr indexer, + LinearCharacterContainerPtr seqres_container, + LinearCharacterContainerPtr atomseq_container, + boost::python::list& position_containers) { + + std::vector<LinearPositionContainerPtr> v_position_containers; + ListToVec(position_containers, v_position_containers); std::vector<int> v_residue_numbers; - geom::Vec3List ca_positions; + std::vector<geom::Vec3List> position_vec; ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer, seqres_container, atomseq_container, - position_container, v_residue_numbers, - ca_positions); + v_position_containers, v_residue_numbers, + position_vec); list residue_numbers; + list position_list; VecToList(v_residue_numbers, residue_numbers); - return boost::python::make_tuple(residue_numbers, ca_positions); + VecToList(position_vec, position_list); + return boost::python::make_tuple(residue_numbers, position_list); } } @@ -226,13 +252,20 @@ void export_linear_db() { arg("position_container"), arg("seq"), arg("positions"))); - def("ExtractTemplateData", &WrapExtractTemplateData, (arg("entry_name"), - arg("chain_name"), - arg("aln"), - arg("linear_indexer"), - arg("seqres_container"), - arg("atomseq_container"), - arg("position_container"))); - + def("ExtractTemplateData", &WrapExtractTemplateDataSingle, (arg("entry_name"), + arg("chain_name"), + arg("aln"), + arg("linear_indexer"), + arg("seqres_container"), + arg("atomseq_container"), + arg("position_container"))); + + def("ExtractTemplateData", &WrapExtractTemplateDataList, (arg("entry_name"), + arg("chain_name"), + arg("aln"), + arg("linear_indexer"), + arg("seqres_container"), + arg("atomseq_container"), + arg("position_containers"))); } diff --git a/modules/db/src/extract_data_helper.cc b/modules/db/src/extract_data_helper.cc index f95ff73e3c1b5e20d026af755f2c36b76d3f389e..d8e18bc6978714693dd0ee9bd6a94601f5570215 100644 --- a/modules/db/src/extract_data_helper.cc +++ b/modules/db/src/extract_data_helper.cc @@ -50,15 +50,15 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name, void ExtractTemplateData(const String& entry_name, const String& chain_name, const ost::seq::AlignmentHandle& aln, - LinearIndexer& indexer, - LinearCharacterContainer& seqres_container, - LinearCharacterContainer& atomseq_container, - LinearPositionContainer& position_container, + LinearIndexerPtr indexer, + LinearCharacterContainerPtr seqres_container, + LinearCharacterContainerPtr atomseq_container, + std::vector<LinearPositionContainerPtr>& position_containers, std::vector<int>& residue_numbers, - geom::Vec3List& ca_positions) { + std::vector<geom::Vec3List>& positions) { - std::pair<uint64_t, uint64_t> data_range = indexer.GetDataRange(entry_name, - chain_name); + std::pair<uint64_t, uint64_t> data_range = indexer->GetDataRange(entry_name, + chain_name); String template_seqres = aln.GetSequence(1).GetGaplessString(); data_range.first += aln.GetSequence(1).GetOffset(); @@ -67,16 +67,19 @@ void ExtractTemplateData(const String& entry_name, const String& chain_name, // check, whether the the template seqres is consistent with what // we find in seqres_container String expected_template_seqres; - seqres_container.GetCharacters(data_range, expected_template_seqres); + seqres_container->GetCharacters(data_range, expected_template_seqres); if(expected_template_seqres != template_seqres) { throw std::runtime_error("Template sequence in input alignment is " "inconsistent with sequence in SEQRES container!"); } String template_atomseq; - atomseq_container.GetCharacters(data_range, template_atomseq); - geom::Vec3List extracted_positions; - position_container.GetPositions(data_range, extracted_positions); + atomseq_container->GetCharacters(data_range, template_atomseq); + int n_pos_containers = position_containers.size(); + std::vector<geom::Vec3List> extracted_positions(n_pos_containers); + for(int i = 0; i < n_pos_containers; ++i) { + position_containers[i]->GetPositions(data_range, extracted_positions[i]); + } uint current_rnum = aln.GetSequence(0).GetOffset() + 1; uint current_template_pos = 0; @@ -85,18 +88,21 @@ void ExtractTemplateData(const String& entry_name, const String& chain_name, // prepare output uint template_atomseq_size = template_atomseq.size(); - ca_positions.clear(); + positions.assign(n_pos_containers, geom::Vec3List()); + for(int i = 0; i < n_pos_containers; ++i) { + positions[i].reserve(template_atomseq_size); + } residue_numbers.clear(); - ca_positions.reserve(template_atomseq_size); residue_numbers.reserve(template_atomseq_size); for(int i = 0; i < aln.GetLength(); ++i) { - if(seqres_seq[i] != '-' && template_seq[i] != '-') { if(template_atomseq[current_template_pos] != '-') { // it is aligned and we have a valid position! residue_numbers.push_back(current_rnum); - ca_positions.push_back(extracted_positions[current_template_pos]); + for(int j = 0; j < n_pos_containers; ++j) { + positions[j].push_back(extracted_positions[j][current_template_pos]); + } } } diff --git a/modules/db/src/extract_data_helper.hh b/modules/db/src/extract_data_helper.hh index 271d03b3d920bf7c5cd1670d99ceaf9496e13191..b83c9a3154c0fb0ce4f5865b21b9e0c755cee260 100644 --- a/modules/db/src/extract_data_helper.hh +++ b/modules/db/src/extract_data_helper.hh @@ -37,12 +37,12 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name, void ExtractTemplateData(const String& entry_name, const String& chain_name, const ost::seq::AlignmentHandle& aln, - LinearIndexer& indexer, - LinearCharacterContainer& seqres_container, - LinearCharacterContainer& atomseq_container, - LinearPositionContainer& position_container, + LinearIndexerPtr indexer, + LinearCharacterContainerPtr seqres_container, + LinearCharacterContainerPtr atomseq_container, + std::vector<LinearPositionContainerPtr>& position_container, std::vector<int>& residue_numbers, - geom::Vec3List& ca_positions); + std::vector<geom::Vec3List>& positions); }} //ns