diff --git a/modules/db/doc/db.rst b/modules/db/doc/db.rst index 87dc0919e840133815ea56af101ca8d9096b2d11..a296c1ec6e5db939085a79dab3563841a38c5b39 100644 --- a/modules/db/doc/db.rst +++ b/modules/db/doc/db.rst @@ -309,14 +309,14 @@ Data Extraction -------------------------------------------------------------------------------- Openstructure provides data extraction functionality for the following scenario: -There is a SEQRES container and an ATOMSEQ container -(both: :class:`LinearCharacterContainer`). -Positions are stored in :class:`LinearPositionContainer`. -The containers contain entries from the protein structure database +There are three binary container. A position container to hold CA-positions +(:class:`LinearPositionContainer`), a SEQRES container and +an ATOMSEQ container (both: :class:`LinearCharacterContainer`). +They contain entries from the protein structure database and sequence/position data is relative to the SEQRES of those entries. This means, if the SEQRES has more characters as there are resolved residues -in the structure, the entry in the position container(s) still contains the exact -number of SEQRES characters but some positions remain invalid. Thats where the +in the structure, the entry in the position container still contains the exact +number of SEQRES characters but some position remain invalid. Thats where the ATOMSEQ container comes in. It only contains matching residues to the SEQRES but marks non-resolved residues with '-'. @@ -378,16 +378,13 @@ marks non-resolved residues with '-'. :type indexer: :class:`LinearIndexer` :type seqres_container: :class:`LinearCharacterContainer` :type atomseq_container: :class:`LinearCharacterContainer` - :type position_container: :class:`LinearPositionContainer` or - :class:`list` of :class:`LinearPositionContainer` + :type position_container: :class:`LinearPositionContainer` :returns: First element: :class:`list` of residue numbers that relate each entry in the second element to the target sequence specified in *aln*. The numbering scheme - starts from one. Second Element: the according positions. - :class:`ost.geom.Vec3List` if *position_container* was a - :class:`LinearPositionContainer`, a list of - :class:`ost.geom.Vec3List` if it was a :class:`list`. + starts from one. Second Element: :class:`geom.Vec3List` + with the according positions. :rtype: :class:`tuple` :raises: :exc:`ost.Error` if requested data is not present in diff --git a/modules/db/pymod/export_linear_db.cc b/modules/db/pymod/export_linear_db.cc index 1cb623cb5b8644d6af0dbd939ca84fc8d44a31c0..597f4243e1f447d47d39f771b12a09a56e6c5e57 100644 --- a/modules/db/pymod/export_linear_db.cc +++ b/modules/db/pymod/export_linear_db.cc @@ -148,50 +148,24 @@ void WrapGetPositions(LinearPositionContainerPtr container, } -tuple WrapExtractTemplateDataSingle(const String& entry_name, const String& chain_name, - const ost::seq::AlignmentHandle& aln, - LinearIndexerPtr indexer, - LinearCharacterContainerPtr seqres_container, - LinearCharacterContainerPtr atomseq_container, - LinearPositionContainerPtr position_container) { - - std::vector<LinearPositionContainerPtr> position_containers; - position_containers.push_back(position_container); - std::vector<int> v_residue_numbers; - std::vector<geom::Vec3List> position_vec; +tuple WrapExtractTemplateData(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + LinearIndexer& indexer, + LinearCharacterContainer& seqres_container, + LinearCharacterContainer& atomseq_container, + LinearPositionContainer& position_container) { - ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer, - seqres_container, atomseq_container, - position_containers, v_residue_numbers, - position_vec); - - list residue_numbers; - VecToList(v_residue_numbers, residue_numbers); - return boost::python::make_tuple(residue_numbers, position_vec[0]); -} - -tuple WrapExtractTemplateDataList(const String& entry_name, const String& chain_name, - const ost::seq::AlignmentHandle& aln, - LinearIndexerPtr indexer, - LinearCharacterContainerPtr seqres_container, - LinearCharacterContainerPtr atomseq_container, - boost::python::list& position_containers) { - - std::vector<LinearPositionContainerPtr> v_position_containers; - ListToVec(position_containers, v_position_containers); std::vector<int> v_residue_numbers; - std::vector<geom::Vec3List> position_vec; + geom::Vec3List ca_positions; ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer, seqres_container, atomseq_container, - v_position_containers, v_residue_numbers, - position_vec); + position_container, v_residue_numbers, + ca_positions); list residue_numbers; - list position_list; VecToList(v_residue_numbers, residue_numbers); - VecToList(position_vec, position_list); - return boost::python::make_tuple(residue_numbers, position_list); + return boost::python::make_tuple(residue_numbers, ca_positions); } } @@ -252,20 +226,13 @@ void export_linear_db() { arg("position_container"), arg("seq"), arg("positions"))); - def("ExtractTemplateData", &WrapExtractTemplateDataSingle, (arg("entry_name"), - arg("chain_name"), - arg("aln"), - arg("linear_indexer"), - arg("seqres_container"), - arg("atomseq_container"), - arg("position_container"))); - - def("ExtractTemplateData", &WrapExtractTemplateDataList, (arg("entry_name"), - arg("chain_name"), - arg("aln"), - arg("linear_indexer"), - arg("seqres_container"), - arg("atomseq_container"), - arg("position_containers"))); + def("ExtractTemplateData", &WrapExtractTemplateData, (arg("entry_name"), + arg("chain_name"), + arg("aln"), + arg("linear_indexer"), + arg("seqres_container"), + arg("atomseq_container"), + arg("position_container"))); + } diff --git a/modules/db/src/extract_data_helper.cc b/modules/db/src/extract_data_helper.cc index d8e18bc6978714693dd0ee9bd6a94601f5570215..f95ff73e3c1b5e20d026af755f2c36b76d3f389e 100644 --- a/modules/db/src/extract_data_helper.cc +++ b/modules/db/src/extract_data_helper.cc @@ -50,15 +50,15 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name, void ExtractTemplateData(const String& entry_name, const String& chain_name, const ost::seq::AlignmentHandle& aln, - LinearIndexerPtr indexer, - LinearCharacterContainerPtr seqres_container, - LinearCharacterContainerPtr atomseq_container, - std::vector<LinearPositionContainerPtr>& position_containers, + LinearIndexer& indexer, + LinearCharacterContainer& seqres_container, + LinearCharacterContainer& atomseq_container, + LinearPositionContainer& position_container, std::vector<int>& residue_numbers, - std::vector<geom::Vec3List>& positions) { + geom::Vec3List& ca_positions) { - std::pair<uint64_t, uint64_t> data_range = indexer->GetDataRange(entry_name, - chain_name); + std::pair<uint64_t, uint64_t> data_range = indexer.GetDataRange(entry_name, + chain_name); String template_seqres = aln.GetSequence(1).GetGaplessString(); data_range.first += aln.GetSequence(1).GetOffset(); @@ -67,19 +67,16 @@ void ExtractTemplateData(const String& entry_name, const String& chain_name, // check, whether the the template seqres is consistent with what // we find in seqres_container String expected_template_seqres; - seqres_container->GetCharacters(data_range, expected_template_seqres); + seqres_container.GetCharacters(data_range, expected_template_seqres); if(expected_template_seqres != template_seqres) { throw std::runtime_error("Template sequence in input alignment is " "inconsistent with sequence in SEQRES container!"); } String template_atomseq; - atomseq_container->GetCharacters(data_range, template_atomseq); - int n_pos_containers = position_containers.size(); - std::vector<geom::Vec3List> extracted_positions(n_pos_containers); - for(int i = 0; i < n_pos_containers; ++i) { - position_containers[i]->GetPositions(data_range, extracted_positions[i]); - } + atomseq_container.GetCharacters(data_range, template_atomseq); + geom::Vec3List extracted_positions; + position_container.GetPositions(data_range, extracted_positions); uint current_rnum = aln.GetSequence(0).GetOffset() + 1; uint current_template_pos = 0; @@ -88,21 +85,18 @@ void ExtractTemplateData(const String& entry_name, const String& chain_name, // prepare output uint template_atomseq_size = template_atomseq.size(); - positions.assign(n_pos_containers, geom::Vec3List()); - for(int i = 0; i < n_pos_containers; ++i) { - positions[i].reserve(template_atomseq_size); - } + ca_positions.clear(); residue_numbers.clear(); + ca_positions.reserve(template_atomseq_size); residue_numbers.reserve(template_atomseq_size); for(int i = 0; i < aln.GetLength(); ++i) { + if(seqres_seq[i] != '-' && template_seq[i] != '-') { if(template_atomseq[current_template_pos] != '-') { // it is aligned and we have a valid position! residue_numbers.push_back(current_rnum); - for(int j = 0; j < n_pos_containers; ++j) { - positions[j].push_back(extracted_positions[j][current_template_pos]); - } + ca_positions.push_back(extracted_positions[current_template_pos]); } } diff --git a/modules/db/src/extract_data_helper.hh b/modules/db/src/extract_data_helper.hh index b83c9a3154c0fb0ce4f5865b21b9e0c755cee260..271d03b3d920bf7c5cd1670d99ceaf9496e13191 100644 --- a/modules/db/src/extract_data_helper.hh +++ b/modules/db/src/extract_data_helper.hh @@ -37,12 +37,12 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name, void ExtractTemplateData(const String& entry_name, const String& chain_name, const ost::seq::AlignmentHandle& aln, - LinearIndexerPtr indexer, - LinearCharacterContainerPtr seqres_container, - LinearCharacterContainerPtr atomseq_container, - std::vector<LinearPositionContainerPtr>& position_container, + LinearIndexer& indexer, + LinearCharacterContainer& seqres_container, + LinearCharacterContainer& atomseq_container, + LinearPositionContainer& position_container, std::vector<int>& residue_numbers, - std::vector<geom::Vec3List>& positions); + geom::Vec3List& ca_positions); }} //ns