Skip to content
Snippets Groups Projects
Commit cdb51fbf authored by BIOPZ-Haas Juergen's avatar BIOPZ-Haas Juergen
Browse files

parsing COMPND records, supported are MOL_ID and CHAIN records only

parent 40085b19
Branches
Tags
No related merge requests found
......@@ -102,6 +102,83 @@ void PDBReader::Init(const boost::filesystem::path& loc)
hard_end_=false;
}
void PDBReader::ParseCompndEntry (const StringRef& line, int line_num)
{
if (line.size()<20) {
if (profile_.fault_tolerant) {
LOG_WARNING("invalid COMPND record on line " << line_num
<< ": record is too short");
return;
}
std::stringstream ss("invalid COMPND record on line ");
ss << line_num <<": record is too short";
throw IOException(ss.str());
}
if (line.rtrim().size()>71) {
if (profile_.fault_tolerant) {
LOG_WARNING("invalid COMPND record on line " << line_num
<< ": record is too long");
return;
}
std::stringstream ss("invalid COMPND record on line ");
ss << line_num <<": record is too long";
throw IOException(ss.str());
}
StringRef entry=line.substr(11,59);
std::vector<StringRef> fields=entry.split(':');
StringRef key=fields[0].trim();
fields[1]=fields[1].rtrim();
fields[1]=fields[1].substr(0, fields[1].size()-1);
//currently only these are parsed
if (!(IEquals(key, StringRef("OL_ID", 5)))&&
!(IEquals(key, StringRef("MOL_ID", 6)))&&
!(IEquals(key, StringRef("CHAIN", 5)))){
LOG_TRACE("reading COMPND record on line " << line_num<< "is not supported");
return;
}
std::vector<StringRef> chain_list;
std::vector<String> chains;
if ((IEquals(key, StringRef("OL_ID", 5))) ||
(IEquals(key, StringRef("MOL_ID", 6)))) {
mol_id_=fields[1].trim().to_int();
if (mol_id_.first) {
LOG_TRACE("COMPND record on line " << line_num<< " MOL_ID: "<<mol_id_.second);
}
if (!mol_id_.first) {
if (profile_.fault_tolerant) {
return;
}
throw IOException(str(format("invalid COMPND record on line %d")%line_num));
}
}
if (IEquals(key, StringRef("CHAIN", 5))) {
if (!mol_id_.first) {
if (profile_.fault_tolerant) {
return;
}
throw IOException(str(format("invalid COMPND record on line %d, CHAIN must be succeeding MOL_ID ")%line_num));
}
chain_list=fields[1].split(',');
//~ PDBReader::CompndEntry cc;
//~ cc.chains=chains;
//~ cc.mol_id=mol_id_.second;
//~ std::cout << "COMPDNsssssssss: "<<fields[1] << " " << mol_id_.second<<std::endl;
//~ int ii=0;
//~ for (CompndList::const_iterator i=compnds_.begin(); i!=compnds_.end(); ++i, ++ii) {
for (std::vector<StringRef>::const_iterator it = chain_list.begin(); it != chain_list.end(); ++it) {
chains.push_back(it->trim().str());
//~ std::cout << it->str() << " Hoi " << ii <<std::endl;
}
compnds_.push_back(CompndEntry(chains, mol_id_.second));
//~ }
}
}
void PDBReader::ParseSeqRes(const StringRef& line, int line_num)
{
conop::BuilderP builder=conop::Conopology::Instance().GetBuilder("DEFAULT");
......@@ -223,6 +300,17 @@ void PDBReader::Import(mol::EntityHandle& ent,
}
}
break;
case 'C':
case 'c':
if (curr_line.size()<20) {
LOG_TRACE("skipping entry");
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) {
LOG_TRACE("processing COMPND entry");
this->ParseCompndEntry(curr_line, line_num_);
}
break;
case 'E':
case 'e':
if (curr_line.size()<3) {
......@@ -319,6 +407,7 @@ void PDBReader::Import(mol::EntityHandle& ent,
<< helix_list_.size() << " helices and "
<< strand_list_.size() << " strands");
this->AssignSecStructure(ent);
this->AssignMolIds(ent);
for (HetList::const_iterator i=hets_.begin(), e=hets_.end(); i!=e; ++i) {
mol::ResidueHandle res=ent.FindResidue(String(1, i->chain), i->num);
if (res.IsValid()) {
......@@ -327,6 +416,17 @@ void PDBReader::Import(mol::EntityHandle& ent,
}
}
void PDBReader::AssignMolIds(mol::EntityHandle ent) {
for (CompndList::const_iterator compnd_iterator=compnds_.begin(), e=compnds_.end();
compnd_iterator!=e; ++compnd_iterator) {
for (std::vector<String>::const_iterator chain_iterator = compnd_iterator->chains.begin();
chain_iterator!= compnd_iterator->chains.end();
++chain_iterator) {
mol::ChainHandle chain=ent.FindChain(*chain_iterator);
chain.SetIntProp("molID", compnd_iterator->mol_id);
}
}
}
void PDBReader::AssignSecStructure(mol::EntityHandle ent)
{
......
......@@ -47,8 +47,14 @@ class DLLEXPORT_OST_IO PDBReader {
char chain;
mol::ResNum num;
};
struct CompndEntry {
CompndEntry(std::vector<String> c, int n): chains(c), mol_id(n) {}
std::vector<String> chains;
int mol_id;
};
typedef std::vector<HSEntry> HSList;
typedef std::vector<HetEntry> HetList;
typedef std::vector<CompndEntry> CompndList;
public:
PDBReader(const String& filename, const IOProfile& profile);
PDBReader(const boost::filesystem::path& loc, const IOProfile& profile);
......@@ -57,15 +63,17 @@ public:
bool HasNext();
void Import(mol::EntityHandle& ent,
const String& restrict_chains="");
const String& restrict_chains="");
void SetReadSeqRes(bool flag) { read_seqres_=flag; }
bool GetReadSeqRes() const { return read_seqres_; }
seq::SequenceList GetSeqRes() const { return seqres_; }
private:
void ParseSeqRes(const StringRef& line, int line_num);
void ParseCompndEntry(const StringRef& line, int line_num);
void ClearState();
void AssignSecStructure(mol::EntityHandle ent);
void AssignMolIds(mol::EntityHandle ent);
void ParseAndAddAtom(const StringRef& line, int line_num,
mol::EntityHandle& h, const StringRef& record_type);
......@@ -96,6 +104,8 @@ private:
boost::iostreams::filtering_stream<boost::iostreams::input> in_;
String curr_line_;
HetList hets_;
CompndList compnds_;
std::pair <bool, int> mol_id_;
// this needs to be set to true for reading pqr
// file (i.e. pdb formatted file with charges in occupacy
// column, and radii in b-factor column)
......
......@@ -56,6 +56,28 @@ BOOST_AUTO_TEST_CASE(test_pdb_import_handler)
pdbh.Import(eh,"testfiles/pdb/simple.pdb");
}
BOOST_AUTO_TEST_CASE(test_parse_compnd_record)
{
String fname("testfiles/pdb/compnd.pdb");
PDBReader reader(fname, IOProfile());
mol::EntityHandle ent=mol::CreateEntity();
reader.Import(ent);
BOOST_REQUIRE_EQUAL(ent.GetChainCount(), 28);
BOOST_REQUIRE_EQUAL(ent.GetResidueCount(), 9273);
mol::ChainHandle ch = ent.FindChain("A");
BOOST_CHECK(ch.HasProp("molID")==true);
BOOST_CHECK(ch.GetIntProp("molID")==1);
}
//~ BOOST_AUTO_TEST_CASE(test_parse_compnd_missing_comma_chain_record)
//~ {
//~ String fname("testfiles/pdb/compnd_missing_comma_chain_record.pdb");
//~ PDBReader reader(fname, IOProfile());
//~ mol::EntityHandle ent=mol::CreateEntity();
//~ reader.Import(ent);
//~ }
BOOST_AUTO_TEST_CASE(atom_record)
{
String fname("testfiles/pdb/atom.pdb");
......
This diff is collapsed.
HEADER HYDROLASE 12-JUN-01 1JD2
TITLE CRYSTAL STRUCTURE OF THE YEAST 20S PROTEASOME:TMC-95A
TITLE 2 COMPLEX: A NON-COVALENT PROTEASOME INHIBITOR
COMPND MOL_ID: 1;
COMPND 2 MOLECULE: PROTEASOME COMPONENT Y7;
COMPND 3 CHAIN: A V;
COMPND 4 SYNONYM: MACROPAIN SUBUNIT Y7, PROTEINASE YSCE SUBUNIT 7,
COMPND 5 MULTICATALYTIC ENDOPEPTIDASE COMPLEX SUBUNIT Y7;
COMPND 6 EC: 3.4.99.46;
COMPND 7 OTHER_DETAILS: PART OF 20S SUBUNIT;
COMPND 8 MOL_ID: 2;
COMPND 9 MOLECULE: PROTEASOME COMPONENT Y13;
COMPND 10 CHAIN: B, W;
COMPND 11 SYNONYM: MACROPAIN SUBUNIT Y13, PROTEINASE YSCE SUBUNIT 13,
COMPND 12 MULTICATALYTIC ENDOPEPTIDASE COMPLEX SUBUNIT Y13;
COMPND 13 EC: 3.4.99.46;
COMPND 14 OTHER_DETAILS: PART OF 20S SUBUNIT;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment