Something went wrong on our end
entity_io_mae_handler.cc 12.07 KiB
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2020 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
/*
Author: Ansgar Philippsen
*/
#include <iostream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <string>
#include <ctype.h>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/format.hpp>
#include <boost/regex.hpp>
#include <boost/shared_array.hpp>
#include <ost/log.hh>
#include <ost/conop/conop.hh>
#include <ost/mol/xcs_editor.hh>
#include <ost/profile.hh>
#include <ost/io/mol/io_profile.hh>
#include <ost/io/io_exception.hh>
#include <ost/io/swap_util.hh>
#include "entity_io_mae_handler.hh"
namespace ost { namespace io {
using boost::format;
MAEReader::MAEReader(const boost::filesystem::path& loc):
curr_chain_(),
curr_residue_(),
chain_count_(0),
residue_count_(0),
atom_count_(0),
infile_(loc),
in_()
{
if (boost::iequals(".gz", boost::filesystem::extension(loc))) {
in_.push(boost::iostreams::gzip_decompressor());
}
in_.push(infile_);
if(!infile_) throw IOException("could not open "+loc.string());
}
void MAEReader::Import(mol::EntityHandle& ent)
{
Profile profile_import("MAEReader::Import");
static const boost::regex r_ct_block("f_m_ct +\\{");
static const boost::regex r_atom_block("m_atom\\[.*\\] +\\{");
static const boost::regex r_delim(":::");
static const boost::regex r_end(" *\\} *");
String line;
bool in_ct_block=false;
bool in_atom_block=false;
bool parsing_atoms=false;
std::vector<std::string> prop_list;
int i_atom_name=-1;
int i_atom_xpos=-1;
int i_atom_ypos=-1;
int i_atom_zpos=-1;
int i_res_name=-1;
int i_res_num=-1;
int i_chain_name=-1;
mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT);
boost::shared_array<char> xline(new char[512]);
while(std::getline(in_,line)) {
//line = boost::trim_copy(line);
if(in_ct_block) {
if(in_atom_block) {
if(parsing_atoms) {
if(boost::regex_search(line,r_delim)) {
LOG_TRACE( "stopping atom parsing" );
parsing_atoms=false;
prop_list.clear();
i_atom_name=-1;
i_atom_xpos=-1;
i_atom_ypos=-1;
i_atom_zpos=-1;
i_res_name=-1;
i_res_num=-1;
i_chain_name=-1;
} else {
// parsing atom line
memset(xline.get(),0,512);
strncpy(xline.get(),line.c_str(),511);
parse_and_add_atom(ent,editor,xline.get(),line.size(),
i_atom_name, i_atom_xpos, i_atom_ypos, i_atom_zpos,
i_res_name, i_res_num, i_chain_name);
}
} else { // not parsing atoms
if(boost::regex_search(line,r_delim)) {
if(i_atom_name==-1 ||
i_atom_xpos==-1 ||
i_atom_ypos==-1 ||
i_atom_zpos==-1 ||
i_res_name==-1 ||
i_res_num==-1) {
throw IOException("missing atom prop");
}
LOG_TRACE( "starting atom parsing" );
parsing_atoms=true;
} else if(boost::regex_search(line,r_end)) {
LOG_TRACE( "exiting atom block" );
in_atom_block=false;
} else {
std::string line2=boost::trim_copy(line);
// parsing property line
if(line2[0]!='#') {
int pid=prop_list.size()+1;
prop_list.push_back(line2);
LOG_TRACE( "found property '" << prop_list.back() << "' id=" << pid );
if(line2=="s_m_pdb_atom_name") i_atom_name=pid;
else if(line2=="r_m_x_coord") i_atom_xpos=pid;
else if(line2=="r_m_y_coord") i_atom_ypos=pid;
else if(line2=="r_m_z_coord") i_atom_zpos=pid;
else if(line2=="s_m_pdb_residue_name") i_res_name=pid;
else if(line2=="i_m_residue_number") i_res_num=pid;
else if(line2=="s_m_chain_name") i_chain_name=pid;
else if(line2=="s_m_pdb_segment_name") {
// only use this one if s_m_chain_name is not present
if(i_chain_name<0) {
i_chain_name=pid;
}
}
}
}
}
} else { // not in atom block
if(boost::regex_search(line,r_atom_block)) {
LOG_TRACE( "entering atom block" );
in_atom_block=true;
} else if(boost::regex_search(line,r_end)) {
LOG_TRACE( "exiting ct block" );
in_ct_block=false;
}
}
} else { // not in ct block
if(boost::regex_search(line,r_ct_block)) {
LOG_TRACE( "entering ct block" );
in_ct_block=true;
}
}
}
LOG_INFO("imported " << chain_count_ << " chains, " << residue_count_
<< " residues, " << atom_count_ << " atoms");
}
void MAEReader::parse_and_add_atom(mol::EntityHandle ent,
mol::XCSEditor& editor,
char* line,
size_t line_len,
int i_atom_name,
int i_atom_xpos,
int i_atom_ypos,
int i_atom_zpos,
int i_res_name,
int i_res_num,
int i_chain_name)
{
std::string aname("X");
std::string rname("UNK");
std::string cname("X");
geom::Vec3 apos;
int irnum=0;
LOG_TRACE("[" << std::string(line) << "]");
bool in_string=false;
char* p0=line;
char* p2=line+line_len;
while(isspace(*p0) && p0<=p2) ++p0;
int tcount=0;
for(char* p1=p0+1;p1<p2;++p1) {
if(in_string) {
if((*p1)=='"') {
in_string=false;
char* p3=p1;
(*p3)='\0';
while(isspace(*p0) && p0<=p3) ++p0;
while(isspace(*(p3-1)) && p3>p0) --p3;
(*p3)='\0';
if(tcount==i_atom_name) {
aname=std::string(p0);
} else if(tcount==i_atom_xpos) {
apos[0]=atof(p0);
} else if(tcount==i_atom_ypos) {
apos[1]=atof(p0);
} else if(tcount==i_atom_zpos) {
apos[2]=atof(p0);
} else if(tcount==i_res_name) {
rname=std::string(p0);
} else if(tcount==i_res_num) {
irnum=atoi(p0);
} else if(tcount==i_chain_name) {
cname=std::string(p0);
}
++tcount;
++p1;
p0=p1;
while(isspace((*p1)) && p1<p2) {++p0;++p1;}
--p0; --p1;
}
} else { // not instring
if(isspace((*p1))) {
(*p1)='\0';
++p0;
if(tcount==i_atom_name) {
aname=std::string(p0);
} else if(tcount==i_atom_xpos) {
apos[0]=atof(p0);
} else if(tcount==i_atom_ypos) {
apos[1]=atof(p0);
} else if(tcount==i_atom_zpos) {
apos[2]=atof(p0);
} else if(tcount==i_res_name) {
rname=std::string(p0);
} else if(tcount==i_res_num) {
irnum=atoi(p0);
} else if(tcount==i_chain_name) {
cname=std::string(p0);
}
++tcount;
++p1;
p0=p1;
while(isspace((*p1)) && p1<p2) {++p0;++p1;}
--p0; --p1;
} else if((*p1)=='"') {
in_string=true;
p0=p1+1;
}
}
}
std::string ele=aname.empty() ? "X" : aname.substr(0,1);
if(isdigit(ele[0])) ele="H";
mol::ResidueKey rkey(rname);
mol::ResNum rnum(irnum);
// determine chain and residue update
bool update_chain=false;
bool update_residue=false;
if(!curr_chain_) {
update_chain=true;
update_residue=true;
} else if(curr_chain_.GetName()!=cname) {
update_chain=true;
update_residue=true;
}
if(!curr_residue_) {
update_residue=true;
} else if(curr_residue_.GetNumber()!=rnum) {
update_residue=true;
}
if(update_chain) {
if (!(curr_chain_=ent.FindChain(cname))) {
curr_chain_=editor.InsertChain(cname);
LOG_TRACE("new chain " << curr_chain_);
++chain_count_;
} else {
LOG_TRACE("old chain " << curr_chain_);
}
}
if(update_residue) {
if (!(curr_residue_=curr_chain_.FindResidue(rnum))) {
if(curr_chain_.GetResidueCount()>0) {
int loc=curr_chain_.GetResidueCount()-1;
for(;loc>=0;--loc) {
if(curr_chain_.GetResidueByIndex(loc).GetNumber()<rnum) break;
}
if(loc<0) {
curr_residue_=editor.InsertResidueBefore(curr_chain_,0,rnum,rkey);
} else {
curr_residue_=editor.InsertResidueAfter(curr_chain_,loc,rnum,rkey);
}
if(!curr_residue_) {
// this should not happen...
curr_residue_=editor.AppendResidue(curr_chain_, rkey, rnum);
}
} else {
curr_residue_=editor.AppendResidue(curr_chain_, rkey, rnum);
}
assert(curr_residue_.IsValid());
LOG_TRACE(" new residue " << curr_residue_);
++residue_count_;
} else {
LOG_TRACE(" old residue " << curr_residue_);
}
}
// finally add atom
LOG_TRACE(" atom " << aname << " (" << ele << ") @" << apos);
++atom_count_;
mol::AtomHandle ah = editor.InsertAtom(curr_residue_, aname, apos, ele);
}
bool EntityIOMAEHandler::RequiresProcessor() const
{
return true;
}
void EntityIOMAEHandler::Import(mol::EntityHandle& ent,
const boost::filesystem::path& loc)
{
MAEReader reader(loc);
reader.Import(ent);
}
void EntityIOMAEHandler::Export(const mol::EntityView& ent,
const boost::filesystem::path& loc) const
{
}
namespace {
bool mae_handler_is_responsible_for(const boost::filesystem::path& loc,
const String& type) {
if(type=="auto") {
String match_suf_string=loc.string();
std::transform(match_suf_string.begin(),match_suf_string.end(),match_suf_string.begin(),tolower);
if( detail::FilenameEndsWith(match_suf_string,".mae") || detail::FilenameEndsWith(match_suf_string,".mae.gz") ) {
return true;
}
} else if(type=="mae") {
return true;
}
return false;
}
}
bool EntityIOMAEHandler::ProvidesImport(const boost::filesystem::path& loc,
const String& type)
{
return mae_handler_is_responsible_for(loc, type);
}
bool EntityIOMAEHandler::ProvidesExport(const boost::filesystem::path& loc,
const String& type)
{
return false;
}
mol::EntityHandle LoadMAE(const String& file_name)
{
MAEReader reader(file_name);
mol::EntityHandle ent=mol::CreateEntity();
mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT);
reader.Import(ent);
IOProfile& prof = IOProfileRegistry::Instance().GetDefault();
if (prof.processor) {
prof.processor->Process(ent);
}
return ent;
}
void EntityIOMAEHandler::Import(mol::EntityHandle& ent,
std::istream& stream)
{
throw IOException("MAE format does not support import from stream");
}
void EntityIOMAEHandler::Export(const mol::EntityView& ent,
std::ostream& stream) const
{
throw IOException("MAE format does not support export to stream");
}
}} // ns