Skip to content
Snippets Groups Projects
Commit e93d9378 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

allow to extract template info from several LinearPositionContainer at once

parent 5c4be5cf
No related branches found
No related tags found
No related merge requests found
......@@ -309,14 +309,14 @@ Data Extraction
--------------------------------------------------------------------------------
Openstructure provides data extraction functionality for the following scenario:
There are three binary container. A position container to hold CA-positions
(:class:`LinearPositionContainer`), a SEQRES container and
an ATOMSEQ container (both: :class:`LinearCharacterContainer`).
They contain entries from the protein structure database
There is a SEQRES container and an ATOMSEQ container
(both: :class:`LinearCharacterContainer`).
Positions are stored in :class:`LinearPositionContainer`.
The containers contain entries from the protein structure database
and sequence/position data is relative to the SEQRES of those entries.
This means, if the SEQRES has more characters as there are resolved residues
in the structure, the entry in the position container still contains the exact
number of SEQRES characters but some position remain invalid. Thats where the
in the structure, the entry in the position container(s) still contains the exact
number of SEQRES characters but some positions remain invalid. Thats where the
ATOMSEQ container comes in. It only contains matching residues to the SEQRES but
marks non-resolved residues with '-'.
......@@ -378,13 +378,16 @@ marks non-resolved residues with '-'.
:type indexer: :class:`LinearIndexer`
:type seqres_container: :class:`LinearCharacterContainer`
:type atomseq_container: :class:`LinearCharacterContainer`
:type position_container: :class:`LinearPositionContainer`
:type position_container: :class:`LinearPositionContainer` or
:class:`list` of :class:`LinearPositionContainer`
:returns: First element: :class:`list` of residue numbers that
relate each entry in the second element to the target
sequence specified in *aln*. The numbering scheme
starts from one. Second Element: :class:`geom.Vec3List`
with the according positions.
starts from one. Second Element: the according positions.
:class:`ost.geom.Vec3List` if *position_container* was a
:class:`LinearPositionContainer`, a list of
:class:`ost.geom.Vec3List` if it was a :class:`list`.
:rtype: :class:`tuple`
:raises: :exc:`ost.Error` if requested data is not present in
......
......@@ -148,24 +148,50 @@ void WrapGetPositions(LinearPositionContainerPtr container,
}
tuple WrapExtractTemplateData(const String& entry_name, const String& chain_name,
const ost::seq::AlignmentHandle& aln,
LinearIndexer& indexer,
LinearCharacterContainer& seqres_container,
LinearCharacterContainer& atomseq_container,
LinearPositionContainer& position_container) {
tuple WrapExtractTemplateDataSingle(const String& entry_name, const String& chain_name,
const ost::seq::AlignmentHandle& aln,
LinearIndexerPtr indexer,
LinearCharacterContainerPtr seqres_container,
LinearCharacterContainerPtr atomseq_container,
LinearPositionContainerPtr position_container) {
std::vector<LinearPositionContainerPtr> position_containers;
position_containers.push_back(position_container);
std::vector<int> v_residue_numbers;
std::vector<geom::Vec3List> position_vec;
ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer,
seqres_container, atomseq_container,
position_containers, v_residue_numbers,
position_vec);
list residue_numbers;
VecToList(v_residue_numbers, residue_numbers);
return boost::python::make_tuple(residue_numbers, position_vec[0]);
}
tuple WrapExtractTemplateDataList(const String& entry_name, const String& chain_name,
const ost::seq::AlignmentHandle& aln,
LinearIndexerPtr indexer,
LinearCharacterContainerPtr seqres_container,
LinearCharacterContainerPtr atomseq_container,
boost::python::list& position_containers) {
std::vector<LinearPositionContainerPtr> v_position_containers;
ListToVec(position_containers, v_position_containers);
std::vector<int> v_residue_numbers;
geom::Vec3List ca_positions;
std::vector<geom::Vec3List> position_vec;
ost::db::ExtractTemplateData(entry_name, chain_name, aln, indexer,
seqres_container, atomseq_container,
position_container, v_residue_numbers,
ca_positions);
v_position_containers, v_residue_numbers,
position_vec);
list residue_numbers;
list position_list;
VecToList(v_residue_numbers, residue_numbers);
return boost::python::make_tuple(residue_numbers, ca_positions);
VecToList(position_vec, position_list);
return boost::python::make_tuple(residue_numbers, position_list);
}
}
......@@ -226,13 +252,20 @@ void export_linear_db() {
arg("position_container"),
arg("seq"), arg("positions")));
def("ExtractTemplateData", &WrapExtractTemplateData, (arg("entry_name"),
arg("chain_name"),
arg("aln"),
arg("linear_indexer"),
arg("seqres_container"),
arg("atomseq_container"),
arg("position_container")));
def("ExtractTemplateData", &WrapExtractTemplateDataSingle, (arg("entry_name"),
arg("chain_name"),
arg("aln"),
arg("linear_indexer"),
arg("seqres_container"),
arg("atomseq_container"),
arg("position_container")));
def("ExtractTemplateData", &WrapExtractTemplateDataList, (arg("entry_name"),
arg("chain_name"),
arg("aln"),
arg("linear_indexer"),
arg("seqres_container"),
arg("atomseq_container"),
arg("position_containers")));
}
......@@ -50,15 +50,15 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name,
void ExtractTemplateData(const String& entry_name, const String& chain_name,
const ost::seq::AlignmentHandle& aln,
LinearIndexer& indexer,
LinearCharacterContainer& seqres_container,
LinearCharacterContainer& atomseq_container,
LinearPositionContainer& position_container,
LinearIndexerPtr indexer,
LinearCharacterContainerPtr seqres_container,
LinearCharacterContainerPtr atomseq_container,
std::vector<LinearPositionContainerPtr>& position_containers,
std::vector<int>& residue_numbers,
geom::Vec3List& ca_positions) {
std::vector<geom::Vec3List>& positions) {
std::pair<uint64_t, uint64_t> data_range = indexer.GetDataRange(entry_name,
chain_name);
std::pair<uint64_t, uint64_t> data_range = indexer->GetDataRange(entry_name,
chain_name);
String template_seqres = aln.GetSequence(1).GetGaplessString();
data_range.first += aln.GetSequence(1).GetOffset();
......@@ -67,16 +67,19 @@ void ExtractTemplateData(const String& entry_name, const String& chain_name,
// check, whether the the template seqres is consistent with what
// we find in seqres_container
String expected_template_seqres;
seqres_container.GetCharacters(data_range, expected_template_seqres);
seqres_container->GetCharacters(data_range, expected_template_seqres);
if(expected_template_seqres != template_seqres) {
throw std::runtime_error("Template sequence in input alignment is "
"inconsistent with sequence in SEQRES container!");
}
String template_atomseq;
atomseq_container.GetCharacters(data_range, template_atomseq);
geom::Vec3List extracted_positions;
position_container.GetPositions(data_range, extracted_positions);
atomseq_container->GetCharacters(data_range, template_atomseq);
int n_pos_containers = position_containers.size();
std::vector<geom::Vec3List> extracted_positions(n_pos_containers);
for(int i = 0; i < n_pos_containers; ++i) {
position_containers[i]->GetPositions(data_range, extracted_positions[i]);
}
uint current_rnum = aln.GetSequence(0).GetOffset() + 1;
uint current_template_pos = 0;
......@@ -85,18 +88,21 @@ void ExtractTemplateData(const String& entry_name, const String& chain_name,
// prepare output
uint template_atomseq_size = template_atomseq.size();
ca_positions.clear();
positions.assign(n_pos_containers, geom::Vec3List());
for(int i = 0; i < n_pos_containers; ++i) {
positions[i].reserve(template_atomseq_size);
}
residue_numbers.clear();
ca_positions.reserve(template_atomseq_size);
residue_numbers.reserve(template_atomseq_size);
for(int i = 0; i < aln.GetLength(); ++i) {
if(seqres_seq[i] != '-' && template_seq[i] != '-') {
if(template_atomseq[current_template_pos] != '-') {
// it is aligned and we have a valid position!
residue_numbers.push_back(current_rnum);
ca_positions.push_back(extracted_positions[current_template_pos]);
for(int j = 0; j < n_pos_containers; ++j) {
positions[j].push_back(extracted_positions[j][current_template_pos]);
}
}
}
......
......@@ -37,12 +37,12 @@ void ExtractValidPositions(const String& entry_name, const String& chain_name,
void ExtractTemplateData(const String& entry_name, const String& chain_name,
const ost::seq::AlignmentHandle& aln,
LinearIndexer& indexer,
LinearCharacterContainer& seqres_container,
LinearCharacterContainer& atomseq_container,
LinearPositionContainer& position_container,
LinearIndexerPtr indexer,
LinearCharacterContainerPtr seqres_container,
LinearCharacterContainerPtr atomseq_container,
std::vector<LinearPositionContainerPtr>& position_container,
std::vector<int>& residue_numbers,
geom::Vec3List& ca_positions);
std::vector<geom::Vec3List>& positions);
}} //ns
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment