Skip to content
Snippets Groups Projects
Commit 18f2f1aa authored by Studer Gabriel's avatar Studer Gabriel
Browse files

Basic AFDB functions used by some experimental code

parent 126af66b
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@ set(MODELLING_CPP
export_scoring_weights.cc
export_sidechain_reconstructor.cc
export_motif_finder.cc
export_afdb.cc
wrap_modelling.cc
)
......
// Copyright (c) 2013-2023, SIB - Swiss Institute of Bioinformatics and
// Biozentrum - University of Basel
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <boost/python.hpp>
#include <promod3/modelling/afdb.hh>
using namespace ost;
using namespace boost::python;
using namespace promod3::modelling;
namespace {
void WrapSeqToPentamerIndices(const String& sequence, bool unique,
boost::python::list& l) {
std::vector<int> indices;
SeqToPentamerIndices(sequence, unique, indices);
for(size_t i = 0; i < indices.size(); ++i) {
l.append(indices[i]);
}
}
}
void export_afdb()
{
def("SeqToPentamerIndices", &WrapSeqToPentamerIndices, (arg("sequence"),
arg("unique"),
arg("result_list")));
def("CreateAFDBIdx", &CreateAFDBIdx, (arg("uniprot_ac"), arg("fragment"),
arg("version")));
}
......@@ -28,6 +28,7 @@ void export_score_container();
void export_scoring_weights();
void export_SidechainReconstructor();
void export_motif_finder();
void export_afdb();
BOOST_PYTHON_MODULE(_modelling)
{
......@@ -43,4 +44,5 @@ BOOST_PYTHON_MODULE(_modelling)
export_scoring_weights();
export_SidechainReconstructor();
export_motif_finder();
export_afdb();
}
......@@ -19,6 +19,7 @@ set(MODELLING_SOURCES
sidechain_reconstructor.cc
sidechain_env_listener.cc
motif_finder.cc
afdb.cc
)
set(MODELLING_HEADERS
......@@ -43,6 +44,7 @@ set(MODELLING_HEADERS
sidechain_env_listener.hh
motif_finder.hh
robin_hood.h
afdb.hh
)
module(NAME modelling
......
// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
// Biozentrum - University of Basel
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <ost/log.hh>
#include <promod3/modelling/afdb.hh>
namespace{
inline int CharToIdx(char ch) {
switch(ch){
case 'A': return 0;
case 'C': return 1;
case 'D': return 2;
case 'E': return 3;
case 'F': return 4;
case 'G': return 5;
case 'H': return 6;
case 'I': return 7;
case 'K': return 8;
case 'L': return 9;
case 'M': return 10;
case 'N': return 11;
case 'P': return 12;
case 'Q': return 13;
case 'R': return 14;
case 'S': return 15;
case 'T': return 16;
case 'V': return 17;
case 'W': return 18;
case 'Y': return 19;
}
std::stringstream ss;
ss << "nonstandard olc observed: " << ch;
throw ost::Error(ss.str());
}
inline uint64_t AlphaToIdx(char ch) {
if(ch == ' ') {
return 0;
} else {
return static_cast<uint64_t>(ch - 'A' + 1);
}
}
inline uint64_t NumericToIdx(char ch) {
if(ch == ' ') {
return 0;
} else {
return static_cast<uint64_t>(ch - '0' + 1);
}
}
inline uint64_t AlphaNumericToIdx(char ch) {
if(ch == ' ') {
return 0;
} else if(ch >= '0' && ch <= '9') {
return static_cast<uint64_t>(ch-'0' + 1);
} else {
return static_cast<uint64_t>(ch-'A' + 1 + 10);
}
}
inline bool CheckAlpha(char ch, bool allow_whitespace) {
return (ch>='A' and ch<='Z') || (allow_whitespace && ch == ' ');
}
inline bool CheckNumeric(char ch, bool allow_whitespace) {
return (ch>='0' and ch<='9') || (allow_whitespace && ch == ' ');
}
inline bool CheckAlphaNumeric(char ch, bool allow_whitespace) {
return CheckAlpha(ch, allow_whitespace) ||
CheckNumeric(ch, allow_whitespace);
}
}
namespace promod3 { namespace modelling {
int PentamerToIdx(const char* ptr) {
return CharToIdx(ptr[0])*160000 + CharToIdx(ptr[1])*8000 +
CharToIdx(ptr[2])*400 + CharToIdx(ptr[3])*20 + CharToIdx(ptr[4]);
}
void SeqToPentamerIndices(const String& seq, bool unique, std::vector<int>& indices) {
int N = seq.size() - 4;
indices.resize(N);
for(int i = 0; i < N; ++i) {
indices[i] = PentamerToIdx(&seq[i]);
}
if(unique) {
auto last = std::unique(indices.begin(), indices.end());
indices.erase(last, indices.end());
}
}
uint64_t CreateAFDBIdx(const String& uniprot_ac, int fragment, int version) {
// check if uniprot AC has expected size of 6 or 10
// https://www.uniprot.org/help/accession_numbers
size_t ac_size = uniprot_ac.size();
if(ac_size != 6 && ac_size != 10) {
std::stringstream ss;
ss << "Expect uniprot AC to be of size 6 or 10, got: " << uniprot_ac;
throw ost::Error(ss.str());
}
if(!CheckAlpha(uniprot_ac[0], false)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckNumeric(uniprot_ac[1], false)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckAlphaNumeric(uniprot_ac[2], false)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckAlphaNumeric(uniprot_ac[3], false)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckAlphaNumeric(uniprot_ac[4], false)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckNumeric(uniprot_ac[5], false)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(ac_size > 6) {
if(!CheckAlpha(uniprot_ac[6], true)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckAlphaNumeric(uniprot_ac[7], true)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckAlphaNumeric(uniprot_ac[8], true)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
if(!CheckNumeric(uniprot_ac[9], true)) {
throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
}
}
if(fragment < 0 || fragment > 127) {
std::stringstream ss;
ss << "Expect fragment to be in range [0, 127], got: " << fragment;
}
if(version < 0 || version > 31) {
std::stringstream ss;
ss << "Expect version to be in range [0, 31], got: " << version;
}
uint64_t idx = 0;
idx += AlphaToIdx(uniprot_ac[0]);
idx += NumericToIdx(uniprot_ac[1]) << 5;
idx += AlphaNumericToIdx(uniprot_ac[2]) << 9;
idx += AlphaNumericToIdx(uniprot_ac[3]) << 15;
idx += AlphaNumericToIdx(uniprot_ac[4]) << 21;
idx += NumericToIdx(uniprot_ac[5]) << 27;
if(ac_size > 6) {
idx += AlphaToIdx(uniprot_ac[6]) << 31;
idx += AlphaNumericToIdx(uniprot_ac[7]) << 36;
idx += AlphaNumericToIdx(uniprot_ac[8]) << 42;
idx += NumericToIdx(uniprot_ac[9]) << 48;
}
idx += static_cast<uint64_t>(fragment) << 52;
idx += static_cast<uint64_t>(version) << 59;
return idx;
}
}} //ns
// Copyright (c) 2013-2023, SIB - Swiss Institute of Bioinformatics and
// Biozentrum - University of Basel
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PM3_MODELLING_AFDB_HH
#define PM3_MODELLING_AFDB_HH
#include <ost/mol/mol.hh>
#include <promod3/core/message.hh>
namespace promod3 { namespace modelling {
int PentamerToIdx(const char* ptr);
void SeqToPentamerIndices(const String& seq, bool unique,
std::vector<int>& indices);
uint64_t CreateAFDBIdx(const String& uniprot_ac, int fragment, int version);
}} //ns
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment