Something went wrong on our end
-
BIOPZ-Haas Juergen authored
silencing warnings during testing; swapping std::cout for LOG_WARNING when there is a chain with no MOL_ID assigned
BIOPZ-Haas Juergen authoredsilencing warnings during testing; swapping std::cout for LOG_WARNING when there is a chain with no MOL_ID assigned
pdb_reader.cc 29.10 KiB
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/format.hpp>
#include <boost/lexical_cast.hpp>
#include <ost/dyn_cast.hh>
#include <ost/profile.hh>
#include <ost/log.hh>
#include <ost/message.hh>
#include <ost/conop/conop.hh>
#include <ost/conop/rule_based_builder.hh>
#include <ost/geom/mat3.hh>
#include <ost/io/io_exception.hh>
#include "pdb_reader.hh"
namespace ost { namespace io {
using boost::format;
namespace {
bool IEquals(const StringRef& a, const StringRef& b)
{
if (a.size()!=b.size()) {
return false;
}
for (size_t i=0; i<a.size(); ++i) {
if (toupper(a[i])!=b[i]) {
return false;
}
}
return true;
}
mol::ResNum to_res_num(int num, char ins_code)
{
return mol::ResNum(num, ins_code==' ' ? '\0' : ins_code);
}
}
PDBReader::PDBReader(std::istream& instream, const IOProfile& profile):
infile_(), instream_(instream), compnds_(), profile_(profile)
{
this->Init(boost::filesystem::path(""));
}
PDBReader::PDBReader(const String& filename, const IOProfile& profile)
: infile_(filename), instream_(infile_), compnds_(), profile_(profile)
{
this->Init(boost::filesystem::path(filename));
}
PDBReader::PDBReader(const boost::filesystem::path& loc,
const IOProfile& profile):
infile_(loc), instream_(infile_), compnds_(), profile_(profile)
{
this->Init(loc);
}
void PDBReader::Init(const boost::filesystem::path& loc)
{
warned_name_mismatch_=false;
read_seqres_=false;
warned_rule_based_=false;
charmm_style_=profile_.dialect=="CHARMM";
num_model_records_=0;
if (boost::iequals(".gz", boost::filesystem::extension(loc))) {
in_.push(boost::iostreams::gzip_decompressor());
}
in_.push(instream_);
if(!infile_) throw IOException("could not open "+loc.string());
line_num_=0;
if(boost::iequals(boost::filesystem::extension(loc), ".pqr")) {
is_pqr_=true;
} else {
is_pqr_=false;
}
hard_end_=false;
skip_next_=false;
data_continues_=false;
old_key_="";
mol_id_=std::make_pair(false, 0);
}
void PDBReader::ThrowFaultTolerant(const String& msg) {
if (profile_.fault_tolerant) {
LOG_WARNING(msg);
return;
}
throw IOException(msg);
}
void PDBReader::ParseCompndEntry (const StringRef& line, int line_num)
{
if (line.size()<12) {
if (profile_.fault_tolerant) {
LOG_WARNING("invalid COMPND record on line " << line_num
<< ": record is too short");
return;
}
std::stringstream ss("invalid COMPND record on line ");
ss << line_num <<": record is too short";
throw IOException(ss.str());
}
if (line.rtrim().size()>80) {
if (profile_.fault_tolerant) {
LOG_WARNING("invalid COMPND record on line " << line_num
<< ": record is too long");
return;
}
std::stringstream ss("invalid COMPND record on line ");
ss << line_num <<": whole record is too long";
throw IOException(ss.str());
}
StringRef entry=line.substr(10,line.size()-10).trim();
StringRef data, key;
if (entry.size()<2){
ThrowFaultTolerant(str(format("invalid COMPND record on line %d, compnd record too small")%line_num));
}
char last_letter=entry[entry.size()-1];
if((last_letter==',') ||
(last_letter==';')) {
entry=entry.substr(0, entry.size()-1); //trim last char
}
if((entry.find(':')!=entry.end())){
std::vector<StringRef> fields=entry.split(':');
key=fields[0].trim();
old_key_=key.str();
if (fields.size()>1) {
data=fields[1].trim();
}
if(data.size()<1){
if (!(key.str()=="MOL_ID")&&!(key.str()=="CHAIN")){
LOG_WARNING("skipping unsupported COMPND record on line " << line_num<< ": record value"<< key.str()<<" too small");
if (data_continues_) {
skip_next_=true;
} else {
return;
}
}
ThrowFaultTolerant(str(format("invalid COMPND record on line %d, record after ':' too small")%line_num));
}
data_continues_=true;
if (last_letter==';') {
data_continues_=false;
}
} else if(skip_next_) {
if (last_letter==';'){
skip_next_=false;
data_continues_=false;
old_key_="";
} else if (last_letter==','){
data_continues_=true;
}
return;
} else if (data_continues_){
data=entry.trim();
if(data.size()<1){
ThrowFaultTolerant(str(format("invalid COMPND record on line %d, record after\
':' was empty")%line_num));
}
key=StringRef(old_key_.data(), old_key_.size());
}
//currently only these are parsed
if (!(key.str()=="MOL_ID")&&!(key.str()=="CHAIN")){
LOG_INFO("reading COMPND record on line " << line_num<< "is not supported");
if (data_continues_) {
skip_next_=true;
} else {
return;
}
}
std::vector<StringRef> chain_list;
std::vector<String> chains;
if ((IEquals(key, StringRef("MOL_ID", 6)))) {
mol_id_=data.trim().to_int();
if (mol_id_.first) {
LOG_TRACE("COMPND record on line " << line_num<< " MOL_ID: "<<mol_id_.second);
}
if (!mol_id_.first) {
ThrowFaultTolerant(str(format("invalid COMPND record on line %d")%line_num));
}
}
if (IEquals(key, StringRef("CHAIN", 5))) {
if (!mol_id_.first) {
ThrowFaultTolerant(str(format("invalid COMPND record on line %d, CHAIN must be succeeding MOL_ID ")%line_num));
}
if (data.find(',')!=data.end()) {
chain_list=data.split(',');
} else {
if(data.size()==1){
chain_list.push_back(data);
} else {
ThrowFaultTolerant(str(format("invalid COMPND record on line %d, CHAIN must be succeeding MOL_ID ")%line_num));
}
}
for (std::vector<StringRef>::const_iterator it = chain_list.begin(); it != chain_list.end(); ++it) {
chains.push_back(it->trim().str());
}
compnds_.push_back(CompndEntry(chains, mol_id_.second));
}
}
void PDBReader::ParseSeqRes(const StringRef& line, int line_num)
{
conop::BuilderP builder=conop::Conopology::Instance().GetBuilder("DEFAULT");
conop::RuleBasedBuilderPtr rbb=dyn_cast<conop::RuleBasedBuilder>(builder);
if (!rbb) {
if (!warned_rule_based_) {
LOG_WARNING("SEQRES import requires the rule-based builder. Ignoring "
"SEQRES records");
}
warned_rule_based_=true;
return;
}
conop::CompoundLibPtr comp_lib=rbb->GetCompoundLib();
if (!seqres_.IsValid()) {
seqres_=seq::CreateSequenceList();
}
if (line.size()<17) {
if (profile_.fault_tolerant) {
LOG_WARNING("invalid SEQRES record on line " << line_num
<< ": record is too short");
return;
}
std::stringstream ss("invalid SEQRES record on line ");
ss << line_num <<": record is too short";
throw IOException(ss.str());
}
String chain(1, line[11]);
seq::SequenceHandle curr_seq;
if (seqres_.GetCount()==0 ||
seqres_[seqres_.GetCount()-1].GetName()!=chain) {
curr_seq=seq::CreateSequence(chain, "");
seqres_.AddSequence(curr_seq);
} else {
curr_seq=seqres_[seqres_.GetCount()-1];
}
for (int i=0; i<14; ++i) {
size_t start=19+i*4;
if (line.size()<start+3) {
return;
}
StringRef rname=line.substr(start, 3);
StringRef trimmed=rname.trim();
if (trimmed.empty()) {
return;
}
conop::CompoundPtr compound=comp_lib->FindCompound(trimmed.str(),
conop::Compound::PDB);
if (!compound) {
if (rname!=StringRef("UNK", 3)) {
LOG_WARNING("unknown residue '" << trimmed << "' in SEQRES record. "
"Setting one-letter-code to '?'");
}
curr_seq.Append('?');
continue;
}
curr_seq.Append(compound->GetOneLetterCode());
}
}
bool PDBReader::HasNext()
{
if (hard_end_) {
return false;
}
// since helix and sheet have to appear before any atom/hetatm records
// a HELIX/SHEET entry implies a next model.
while (std::getline(in_, curr_line_) && ++line_num_) {
StringRef curr_line(curr_line_.c_str(), curr_line_.length());
if (curr_line.size()>5 &&
(IEquals(curr_line.substr(0, 6), StringRef("ATOM ", 6)) ||
(!profile_.no_hetatms &&
IEquals(curr_line.substr(0, 6),StringRef("HETATM ", 6))) ||
IEquals(curr_line.substr(0, 6),StringRef("ANISOU ", 6)) ||
IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6)) ||
IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6)) ||
IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6)) ||
IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6)) ||
IEquals(curr_line.substr(0, 6), StringRef("SEQRES", 6)) ||
IEquals(curr_line.substr(0, 6), StringRef("HET ", 6)))) {
return true;
} else if (IEquals(curr_line.rtrim(), StringRef("END", 3))) {
hard_end_=true;
return false;
}
}
return false;
}
void PDBReader::Import(mol::EntityHandle& ent,
const String& restrict_chains)
{
LOG_DEBUG("PDBReader: " << profile_);
Profile profile_import("PDBReader::Import");
this->ClearState();
// first read curr_line and then read next...
restrict_chains_=restrict_chains;
bool go_on=true;
do {
if (curr_line_.empty()) {
continue;
}
StringRef curr_line(curr_line_.c_str(), curr_line_.size());
switch(curr_line[0]) {
case 'A':
case 'a':
if (curr_line.size()<6) {
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("ATOM ", 6))) {
LOG_TRACE("processing ATOM entry");
this->ParseAndAddAtom(curr_line, line_num_, ent,
StringRef("ATOM", 4));
} else if (IEquals(curr_line.substr(0, 6), StringRef("ANISOU", 6))) {
if (!charmm_style_) {
LOG_TRACE("processing ANISOU entry");
this->ParseAnisou(curr_line, line_num_, ent);
}
}
break;
case 'C':
case 'c':
if (curr_line.size()<20) {
LOG_TRACE("skipping entry");
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) {
LOG_TRACE("processing COMPND entry");
this->ParseCompndEntry(curr_line, line_num_);
}
break;
case 'E':
case 'e':
if (curr_line.size()<3) {
continue;
}
if (IEquals(curr_line.rtrim(), StringRef("END", 3))) {
hard_end_=true;
go_on=false;
break;
}
if (curr_line.size()<6) {
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("ENDMDL", 6))) {
go_on=false;
num_model_records_=0;
break;
}
case 'H':
case 'h':
if (curr_line.size()<6) {
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("HETATM", 6))) {
if (profile_.no_hetatms)
continue;
LOG_TRACE("processing HETATM entry");
this->ParseAndAddAtom(curr_line, line_num_, ent,
StringRef("HETATM", 6));
} else if (IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6))) {
if (!charmm_style_) {
this->ParseHelixEntry(curr_line);
}
} else if (IEquals(curr_line.substr(0, 6), StringRef("HET ", 6))) {
// remember het entry to mark the residues as ligand during ATOM import
char chain=curr_line[12];
std::pair<bool, int> num=curr_line.substr(13, 4).ltrim().to_int();
if (!num.first) {
if (profile_.fault_tolerant) {
LOG_WARNING("Invalid HET entry on line " << line_num_);
continue;
} else {
String msg=str(format("Invalid HET entry on line %d")%line_num_);
throw IOException(msg);
}
}
hets_.push_back(HetEntry(chain, to_res_num(num.second,
curr_line[17])));
}
break;
case 'M':
case 'm':
if (curr_line.size()<6) {
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6))) {
++num_model_records_;
if (num_model_records_<2) {
continue;
}
if (profile_.fault_tolerant) {
go_on=false;
num_model_records_=1;
break;
}
String msg=str(format("MODEL record without matching ENDMDL on line %d")%line_num_);
throw IOException(msg);
}
break;
case 'S':
case 's':
if (curr_line.size()<6) {
continue;
}
if (IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6))) {
if (!charmm_style_) {
this->ParseStrandEntry(curr_line);
}
}
if (IEquals(curr_line.substr(0, 6), StringRef("SEQRES", 6))) {
if (read_seqres_) {
this->ParseSeqRes(curr_line, line_num_);
}
}
break;
default:
break;
}
} while (std::getline(in_, curr_line_) && ++line_num_ && go_on);
LOG_INFO("imported "
<< chain_count_ << " chains, "
<< residue_count_ << " residues, "
<< atom_count_ << " atoms; with "
<< helix_list_.size() << " helices and "
<< strand_list_.size() << " strands");
this->AssignSecStructure(ent);
this->AssignMolIds(ent);
for (HetList::const_iterator i=hets_.begin(), e=hets_.end(); i!=e; ++i) {
mol::ResidueHandle res=ent.FindResidue(String(1, i->chain), i->num);
if (res.IsValid()) {
res.SetIsLigand(true);
}
}
}
void PDBReader::AssignMolIds(mol::EntityHandle ent) {
LOG_INFO("Assigning MOL_IDs");
for (CompndList::const_iterator compnd_iterator=compnds_.begin(), e=compnds_.end();
compnd_iterator!=e; ++compnd_iterator) {
for (std::vector<String>::const_iterator chain_iterator = compnd_iterator->chains.begin();
chain_iterator!= compnd_iterator->chains.end();
++chain_iterator) {
if (restrict_chains_.size()==0 ||
(restrict_chains_.find(*chain_iterator)!=String::npos)) {
mol::ChainHandle chain=ent.FindChain(*chain_iterator);
if (chain) {
chain.SetIntProp("mol_id", compnd_iterator->mol_id);
}else{
LOG_WARNING("failed to assign MOL_ID to chain: "<<*chain_iterator <<std::endl);
std::stringstream ss;
ss << "could not map COMPND record MOL_ID onto chain";
ss <<*chain_iterator;
ThrowFaultTolerant(ss.str());
}
}
}
}
if (compnds_.size()>0){
mol::ChainHandleList ch_list=ent.GetChainList();
for (mol::ChainHandleList::const_iterator chain=ch_list.begin();
chain!=ch_list.end(); ++chain) {
//~ skip HETATM only chains!!
if(chain->IsValid()){
if (!chain->HasProp("mol_id")) {
std::stringstream ss;
ss << "found chain without MOL_ID: ";
ss << chain->GetName();
LOG_WARNING(ss.str());
}
}
}
}
}
void PDBReader::AssignSecStructure(mol::EntityHandle ent)
{
// and now for the fun part, assigning the strands/helices to residues
for (HSList::const_iterator i=helix_list_.begin(), e=helix_list_.end();
i!=e; ++i) {
mol::ChainHandle chain=ent.FindChain(i->chain);
if (!chain.IsValid()) {
LOG_INFO("ignoring helix record for unknown chain "+i->chain);
continue;
}
mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX);
// some PDB files contain helix/strand entries that are adjacent to each
// other. To avoid visual artifacts, we effectively shorten the first of the
// two secondary structure segments to insert one residue of coil
// conformation.
mol::ResNum start=i->start, end=i->end;
if (helix_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 &&
(*(i+1)).end.GetNum()>end.GetNum()) {
end=mol::ResNum((*(i+1)).start.GetNum()-2);
}
chain.AssignSecondaryStructure(alpha, start, end);
}
for (HSList::const_iterator i=strand_list_.begin(), e=strand_list_.end();
i!=e; ++i) {
mol::ChainHandle chain=ent.FindChain(i->chain);
if (!chain.IsValid()) {
LOG_INFO("ignoring strand record for unknown chain "+i->chain);
continue;
}
mol::SecStructure extended(mol::SecStructure::EXTENDED);
mol::ResNum start=i->start, end=i->end;
// see comment for helix assignment
if (strand_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 &&
(*(i+1)).end.GetNum()>end.GetNum()) {
end=mol::ResNum((*(i+1)).start.GetNum()-2);
}
chain.AssignSecondaryStructure(extended, start, end);
}
}
void PDBReader::ClearState()
{
curr_chain_=mol::ChainHandle();
curr_residue_=mol::ResidueHandle();
seqres_=seq::SequenceList();
chain_count_=0;
residue_count_=0;
atom_count_=0;
hard_end_=false;
helix_list_.clear();
strand_list_.clear();
hets_.clear();
}
bool PDBReader::EnsureLineLength(const StringRef& line, size_t size)
{
if (line.length()<size) {
if (profile_.fault_tolerant) {
return false;
}
throw IOException(str(format("premature end of line %d") %line_num_));
}
return true;
}
bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num,
String& chain_name, StringRef& res_name,
mol::ResNum& resnum, StringRef& atom_name,
char& alt_loc, const StringRef& record_type)
{
if (!this->EnsureLineLength(line, 27)) {
return false;
}
atom_name=line.substr(12, 4).trim();
if (profile_.calpha_only) {
if (atom_name!=StringRef("CA", 2)) {
return false;
}
}
if (charmm_style_) {
if (line.size()>73) {
size_t width=std::min(line.size()-72, size_t(4));
chain_name=line.substr(72, width).trim().str();
}
} else {
chain_name=String(1, line[21]);
if (restrict_chains_.size()>0 &&
restrict_chains_.find(chain_name)==String::npos) {
return false;
}
}
alt_loc=line[16];
res_name=line.substr(17, charmm_style_ ? 4 : 3).trim();
std::pair<bool, int> res_num=line.substr(22, 4).ltrim().to_int();;
if (!res_num.first) {
if (profile_.fault_tolerant) {
return false;
}
throw IOException(str(format("invalid res number on line %d") % line_num));
}
char ins_c=line[26];
resnum=to_res_num(res_num.second, ins_c);
return true;
}
void PDBReader::ParseAnisou(const StringRef& line, int line_num,
mol::EntityHandle& ent)
{
if (!this->EnsureLineLength(line, 77)) {
return;
}
String chain_name;
char alt_loc=0;
StringRef res_name, atom_name;
mol::ResNum res_num(0);
if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num,
atom_name, alt_loc, StringRef("ANISOU", 6))) {
return;
}
double anisou[6]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
for (int i=0;i<6; ++i) {
std::pair<bool, int> result=line.substr(29+i*7, 6).ltrim().to_int();
if (!result.first) {
if (profile_.fault_tolerant) {
return;
}
throw IOException(str(format("invalid ANISOU record on line %d")%line_num));
}
anisou[i]=result.second;
}
String aname(atom_name.str());
if (!curr_residue_.IsValid()) {
if (profile_.fault_tolerant || profile_.calpha_only) {
return;
}
const char* fmt_str="invalid ANISOU record for inexistent atom on line %d";
throw IOException(str(format(fmt_str) % line_num));
}
mol::AtomHandle atom=curr_residue_.FindAtom(aname);
if (!atom.IsValid()) {
if (profile_.fault_tolerant ||
profile_.calpha_only ||
warned_name_mismatch_) {
return;
}
const char* fmt_str="invalid ANISOU record for inexistent atom on line %d";
throw IOException(str(format(fmt_str) % line_num));
}
//get properties which are already set and extend them by adding the ANISOU info
geom::Mat3 mat(anisou[0], anisou[3], anisou[4],
anisou[3], anisou[1], anisou[5],
anisou[4], anisou[5], anisou[2]);
mat/=10000;
atom.SetAnisou(mat);
}
void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num,
mol::EntityHandle& ent,
const StringRef& record_type)
{
if (!this->EnsureLineLength(line, 54)) {
return;
}
mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT);
char alt_loc=0;
String chain_name;
StringRef res_name, atom_name;
mol::ResNum res_num(0);
if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num,
atom_name, alt_loc, record_type)) {
return;
}
std::pair<bool, Real> charge, radius;
std::pair<bool, Real> occ, temp;
geom::Vec3 apos;
for (int i=0;i<3; ++i) {
std::pair<bool, float> result=line.substr(30+i*8, 8).ltrim().to_float();
if (!result.first) {
if (profile_.fault_tolerant) {
return;
}
throw IOException(str(format("invalid coordinate on line %d")%line_num));
}
apos[i]=result.second;
}
// b-factors and occ are replaced by radius and charge if PQR file format
if(is_pqr_) {
occ=std::make_pair(true, Real(1.0));
temp=std::make_pair(true, Real(0.0));
if (line.length()>=60) {
charge=line.substr(55,7).ltrim().to_float();
}
if (line.length()>=68) {
radius=line.substr(63,6).ltrim().to_float();
}
} else {
if (line.length()>=60) {
occ=line.substr(54,6).ltrim().to_float();
}
if (line.length()>=66) {
temp=line.substr(60, 6).ltrim().to_float();
}
}
LOG_TRACE( "line: [" << line << "]" );
String s_ele;
// determine element from element column (77-78, right justified) if
// present otherwise set to empty String. It is up to the builder to
// determine the correct element in that case.
if (line.length()>=78) { // element column present
if(line[76]==' ' && line[77]==' ') { // both characters are empty
s_ele="";
} else if(line[76]!=' ' || line[77]!=' ') { // at least one character not
// empty
if(line[76]==' ' && line[77]!=' ') { // single character element,
// right justified
s_ele=line.substr(77,1).str();
} else if(line[76]!=' ' && line[77]==' ') {// single character element,
// left justified
s_ele=line.substr(76,1).str();
} else { // Real character element
s_ele=line.substr(76,2).str();
}
}
}
String aname(atom_name.str());
// some postprocessing
LOG_TRACE( "s_chain: [" << chain_name << "]" );
// determine chain and residue update
bool update_chain=false;
bool update_residue=false;
if(!curr_chain_) {
update_chain=true;
update_residue=true;
} else if(curr_chain_.GetName()!=chain_name) {
update_chain=true;
update_residue=true;
}
if(!curr_residue_) {
update_residue=true;
} else if(curr_residue_.GetNumber()!=res_num) {
update_residue=true;
}
if(update_chain) {
curr_chain_=ent.FindChain(chain_name);
if(!curr_chain_.IsValid()) {
LOG_DEBUG("new chain " << chain_name);
curr_chain_=editor.InsertChain(chain_name);
++chain_count_;
}
assert(curr_chain_.IsValid());
}
if(update_residue) {
curr_residue_=mol::ResidueHandle();
if (profile_.join_spread_atom_records) {
curr_residue_=curr_chain_.FindResidue(res_num);
}
if (!curr_residue_.IsValid()) {
LOG_DEBUG("new residue " << res_name << " " << res_num);
curr_residue_=editor.AppendResidue(curr_chain_, res_name.str(), res_num);
warned_name_mismatch_=false;
++residue_count_;
}
assert(curr_residue_.IsValid());
}
// finally add atom
LOG_DEBUG("adding atom " << aname << " (" << s_ele << " '" << alt_loc << "'" << ") @" << apos);
mol::AtomHandle ah;
if (curr_residue_.GetName()!=res_name.str()) {
if (!profile_.fault_tolerant && alt_loc==' ') {
std::stringstream ss;
ss << "error on line " << line_num << ": "
<< "residue with number " << res_num << " has more than one name.";
throw IOException(ss.str());
}
if(!profile_.quack_mode) {
if (!warned_name_mismatch_) {
if (alt_loc==' ') {
LOG_WARNING("Residue with number " << res_num << " has more than one name. "
"Ignoring atoms for everything but the first");
} else {
LOG_WARNING("Residue with number " << res_num
<< " contains a microheterogeneity. Everything but atoms for "
<< "the residue '" << curr_residue_.GetName()
<< "' will be ignored");
}
}
warned_name_mismatch_=true;
return;
}
}
Real b=temp.first ? temp.second : 0.0;
Real o=occ.first ? occ.second : 1.0;
if (!profile_.quack_mode && alt_loc!=' ') {
// Check if there is already a atom with the same name.
mol::AtomHandle me=curr_residue_.FindAtom(aname);
if (me.IsValid()) {
try {
editor.AddAltAtomPos(String(1, alt_loc), me, apos, o, b);
} catch (Error) {
LOG_INFO("Ignoring atom alt location since there is already an atom "
"with name " << aname << ", but without an alt loc");
return;
}
return;
} else {
ah=editor.InsertAltAtom(curr_residue_, aname,
String(1, alt_loc), apos, s_ele, o, b);
++atom_count_;
}
} else {
mol::AtomHandle atom=curr_residue_.FindAtom(aname);
if (atom.IsValid() && !profile_.quack_mode) {
if (profile_.fault_tolerant) {
LOG_WARNING("duplicate atom '" << aname << "' in residue "
<< curr_residue_);
return;
}
throw IOException("duplicate atom '"+aname+"' in residue "+
curr_residue_.GetQualifiedName());
}
ah=editor.InsertAtom(curr_residue_, aname, apos, s_ele);
++atom_count_;
}
if(is_pqr_) {
if (radius.first) {
ah.SetRadius(radius.second);
}
}
ah.SetBFactor(b);
ah.SetOccupancy(o);
if (charge.first) {
ah.SetCharge(charge.second);
}
ah.SetHetAtom(record_type[0]=='H');
}
void PDBReader::ParseHelixEntry(const StringRef& line)
{
if (!this->EnsureLineLength(line, 38)) {
return;
}
std::pair<bool, int> start_num=line.substr(21, 4).ltrim().to_int();
std::pair<bool, int> end_num=line.substr(33, 4).ltrim().to_int();
if (!start_num.first || !end_num.first) {
if (profile_.fault_tolerant) {
return;
}
throw IOException(str(format("invalid helix entry on line %d") % line_num_));
}
LOG_DEBUG("making helix entry: " << start_num.second << ", "
<< line[25] << " " << end_num.second << " " << line[37]);
HSEntry hse = {to_res_num(start_num.second, line[25]),
to_res_num(end_num.second, line[37]),
line.substr(19,1).str()};
if (restrict_chains_.size()==0 ||
restrict_chains_.find(hse.chain)!=String::npos) {
helix_list_.push_back(hse);
}
}
void PDBReader::ParseStrandEntry(const StringRef& line)
{
if (!this->EnsureLineLength(line, 38)) {
return;
}
std::pair<bool, int> start_num=line.substr(22, 4).ltrim().to_int();
std::pair<bool, int> end_num=line.substr(33, 4).ltrim().to_int();
if (!start_num.first || !end_num.first) {
if (profile_.fault_tolerant) {
return;
}
throw IOException(str(format("invalid strand entry on line %d")%line_num_));
}
LOG_DEBUG("making strand entry: " << start_num.second << ", " << line[26]
<< " " << end_num.second << " " << line[37] << " chain=" << line[21]);
HSEntry hse = {to_res_num(start_num.second, line[26]),
to_res_num(end_num.second, line[37]),
line.substr(21,1).str()};
if (restrict_chains_.size()==0 ||
restrict_chains_.find(hse.chain)!=String::npos) {
strand_list_.push_back(hse);
}
}
}}