Something went wrong on our end
-
Marco Biasini authoredMarco Biasini authored
dcd_io.cc 13.04 KiB
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
/*
Authors: Ansgar Philippsen, Marco Biasini
*/
#include <fstream>
#include <algorithm>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/filesystem/convenience.hpp>
#include <ost/stdint.hh>
#include <ost/log.hh>
#include <ost/profile.hh>
#include <ost/conop/conop.hh>
#include <ost/mol/xcs_editor.hh>
#include <ost/io/io_exception.hh>
#include <ost/io/swap_util.hh>
#include "load_entity.hh"
#include "dcd_io.hh"
#include "pdb_reader.hh"
#include "pdb_writer.hh"
#include "entity_io_crd_handler.hh"
namespace ost { namespace io {
/*
icntrl[1]: number of coordinate sets in file
icntrl[2]: number of previous dynamic steps
icntrl[3]: frequency for saving coordinates
icntlr[4]: number of steps for creation run
*/
namespace {
struct DCDHeader {
char hdrr[4];
int icntrl[20];
int ntitle;
char title[1024];
int num, istep, freq,nstep;
int t_atom_count,f_atom_count, atom_count;
};
bool less_index(const mol::AtomHandle& a1, const mol::AtomHandle& a2)
{
return a1.GetIndex()<a2.GetIndex();
}
bool read_dcd_header(std::istream& istream, DCDHeader& header, bool& swap_flag,
bool& skip_flag, bool& gap_flag)
{
if (!istream) {
return false;
}
char dummy[4];
gap_flag=true;
swap_flag=false;
skip_flag=false;
if(gap_flag) istream.read(dummy,sizeof(dummy));
istream.read(header.hdrr,sizeof(char)*4);
istream.read(reinterpret_cast<char*>(header.icntrl),sizeof(int)*20);
if(header.icntrl[1]<0 || header.icntrl[1]>1e8) {
// nonsense atom count, try swapping
swap_int(header.icntrl,20);
if(header.icntrl[1]<0 || header.icntrl[1]>1e8) {
throw(IOException("LoadCHARMMTraj: nonsense atom count in header"));
} else {
LOG_VERBOSE("LoadCHARMMTraj: byte-swapping");
swap_flag=true;
}
}
if(header.icntrl[19]!=0) { // CHARMM format
skip_flag=(header.icntrl[10]!=0);
if(skip_flag) {
LOG_VERBOSE("LoadCHARMMTraj: using CHARMM format with per-frame header");
} else {
LOG_VERBOSE("LoadCHARMMTraj: using CHARMM format");
}
} else {
// XPLOR format
LOG_VERBOSE("LoadCHARMMTraj: using XPLOR format");
}
if(gap_flag) istream.read(dummy,sizeof(dummy));
if (!istream) {
return false;
}
istream.read(reinterpret_cast<char*>(&header.ntitle),sizeof(int));
if (!istream) {
return false;
}
if(swap_flag) swap_int(&header.ntitle,1);
if(gap_flag) istream.read(dummy,sizeof(dummy));
istream.read(header.title,sizeof(char)*header.ntitle);
header.title[header.ntitle]='\0';
if(gap_flag) istream.read(dummy,sizeof(dummy));
istream.read(reinterpret_cast<char*>(&header.t_atom_count),sizeof(int));
if(swap_flag) swap_int(&header.t_atom_count,1);
if(gap_flag) istream.read(dummy,sizeof(dummy));
header.num=header.icntrl[0];
header.istep=header.icntrl[1];
header.freq=header.icntrl[2];
header.nstep=header.icntrl[3];
header.f_atom_count=header.icntrl[8];
header.atom_count=header.t_atom_count-header.f_atom_count;
return true;
}
size_t calc_frame_size(bool skip_flag, bool gap_flag, size_t num_atoms)
{
size_t frame_size=0;
if (skip_flag) {
frame_size+=14*sizeof(int);
}
if (gap_flag) {
frame_size+=6*sizeof(int);
}
frame_size+=3*sizeof(float)*num_atoms;
return frame_size;
}
bool read_frame(std::istream& istream, const DCDHeader& header,
size_t frame_size, bool skip_flag, bool gap_flag,
bool swap_flag, std::vector<float>& xlist,
std::vector<geom::Vec3>& frame)
{
char dummy[4];
if(skip_flag) istream.seekg(14*4,std::ios_base::cur);
// read each frame
if(!istream) {
/* premature EOF */
LOG_ERROR("LoadCHARMMTraj: premature end of file, frames read");
return false;
}
// x coord
if(gap_flag) istream.read(dummy,sizeof(dummy));
istream.read(reinterpret_cast<char*>(&xlist[0]),sizeof(float)*xlist.size());
if(swap_flag) swap_float(&xlist[0],xlist.size());
if(gap_flag) istream.read(dummy,sizeof(dummy));
for(uint j=0;j<frame.size();++j) {
frame[j].x=xlist[j];
}
// y coord
if(gap_flag) istream.read(dummy,sizeof(dummy));
istream.read(reinterpret_cast<char*>(&xlist[0]),sizeof(float)*xlist.size());
if(swap_flag) swap_float(&xlist[0],xlist.size());
if(gap_flag) istream.read(dummy,sizeof(dummy));
for(uint j=0;j<frame.size();++j) {
frame[j].y=xlist[j];
}
// z coord
if(gap_flag) istream.read(dummy,sizeof(dummy));
istream.read(reinterpret_cast<char*>(&xlist[0]),sizeof(float)*xlist.size());
if(swap_flag) swap_float(&xlist[0],xlist.size());
if(gap_flag) istream.read(dummy,sizeof(dummy));
for(uint j=0;j<frame.size();++j) {
frame[j].z=xlist[j];
}
return true;
}
mol::CoordGroupHandle load_dcd(const mol::AtomHandleList& alist2,
const String& trj_fn,
unsigned int stride)
{
Profile profile_load("LoadCHARMMTraj");
mol::AtomHandleList alist(alist2);
std::sort(alist.begin(),alist.end(),less_index);
std::ifstream istream(trj_fn.c_str(), std::ios::binary);
DCDHeader header;
bool swap_flag=false, skip_flag=false, gap_flag=false;
read_dcd_header(istream, header, swap_flag, skip_flag, gap_flag);
LOG_DEBUG("LoadCHARMMTraj: " << header.num << " trajectories with "
<< header.atom_count << " atoms (" << header.f_atom_count
<< " fixed) each");
if(alist.size() != static_cast<size_t>(header.t_atom_count)) {
LOG_ERROR("LoadCHARMMTraj: atom count missmatch: " << alist.size()
<< " in coordinate file, " << header.t_atom_count
<< " in each traj frame");
throw(IOException("invalid trajectory"));
}
mol::CoordGroupHandle cg=CreateCoordGroup(alist);
std::vector<geom::Vec3> clist(header.t_atom_count);
std::vector<float> xlist(header.t_atom_count);
size_t frame_size=calc_frame_size(skip_flag, gap_flag, xlist.size());
int i=0;
for(;i<header.num;i+=stride) {
if (!read_frame(istream, header, frame_size, skip_flag, gap_flag,
swap_flag, xlist, clist)) {
break;
}
cg.AddFrame(clist);
// skip frames (defined by stride)
if(stride>1) istream.seekg(frame_size*(stride-1),std::ios_base::cur);
}
istream.get();
if(!istream.eof()) {
LOG_VERBOSE("LoadCHARMMTraj: unexpected trailing file data, bytes read: "
<< istream.tellg());
}
LOG_VERBOSE("Loaded " << cg.GetFrameCount() << " frames with " << cg.GetAtomCount() << " atoms each");
return cg;
}
class DCDCoordSource : public mol::CoordSource {
public:
DCDCoordSource(const mol::AtomHandleList& atoms, const String& filename,
uint stride):
mol::CoordSource(atoms), filename_(filename),
stream_(filename.c_str(), std::ios::binary), loaded_(false), stride_(stride)
{
frame_count_=0;
this->SetMutable(false);
frame_=mol::CoordFramePtr(new mol::CoordFrame(atoms.size()));
}
virtual uint GetFrameCount()
{
if (!frame_count_)
const_cast<DCDCoordSource*>(this)->FetchFrame(0);
return frame_count_;
}
virtual mol::CoordFramePtr GetFrame(uint frame_id) const {
const_cast<DCDCoordSource*>(this)->FetchFrame(frame_id);
return frame_;
}
virtual void AddFrame(const std::vector<geom::Vec3>& coords) {}
virtual void InsertFrame(int pos, const std::vector<geom::Vec3>& coords) {}
private:
void FetchFrame(uint frame);
String filename_;
DCDHeader header_;
bool skip_flag_;
bool swap_flag_;
bool gap_flag_;
std::ifstream stream_;
bool loaded_;
uint frame_count_;
uint curr_frame_;
uint stride_;
size_t frame_start_;
mol::CoordFramePtr frame_;
};
void DCDCoordSource::FetchFrame(uint frame)
{
if (!loaded_) {
read_dcd_header(stream_, header_, swap_flag_, skip_flag_, gap_flag_);
frame_start_=stream_.tellg();
loaded_=true;
frame_count_=header_.num/stride_;
}
size_t frame_size=calc_frame_size(skip_flag_, gap_flag_,
header_.t_atom_count);
size_t pos=frame_start_+frame_size*frame*stride_;
stream_.seekg(pos,std::ios_base::beg);
std::vector<float> xlist(header_.t_atom_count);
if (!read_frame(stream_, header_, frame_size, skip_flag_, gap_flag_,
swap_flag_, xlist, *frame_.get())) {
}
}
typedef boost::shared_ptr<DCDCoordSource> DCDCoordSourcePtr;
} // anon ns
mol::CoordGroupHandle LoadCHARMMTraj(const mol::EntityHandle& ent,
const String& trj_fn,
unsigned int stride, bool lazy_load)
{
mol::AtomHandleList alist(ent.GetAtomList());
std::sort(alist.begin(),alist.end(),less_index);
if (lazy_load) {
LOG_INFO("Importing CHARMM trajectory with lazy_load=true");
DCDCoordSource* source=new DCDCoordSource(alist, trj_fn, stride);
return mol::CoordGroupHandle(DCDCoordSourcePtr(source));
}
LOG_INFO("Importing CHARMM trajectory with lazy_load=false");
return load_dcd(alist, trj_fn, stride);
}
namespace {
void write_dcd_hdr(std::ofstream& out,
const mol::CoordGroupHandle& coord_group,
unsigned int stepsize)
{
int32_t magic_number=84;
char crd[]={'C', 'O', 'R', 'D'};
out.write(reinterpret_cast<char*>(&magic_number), 4);
out.write(crd, 4);
int32_t num_frames=coord_group.GetFrameCount()/stepsize;
out.write(reinterpret_cast<char*>(&num_frames), 4);
int32_t zero=0;
// write zero for istart
out.write(reinterpret_cast<char*>(&zero), 4);
int32_t one=1;
// number of timesteps between dcd saves.
out.write(reinterpret_cast<char*>(&one), 4);
// write spacer of 5 blank integers
for (int i=0; i<5; ++i) {
out.write(reinterpret_cast<char*>(&zero), 4);
}
// write number of fixed atoms
out.write(reinterpret_cast<char*>(&zero), 4);
float delta=1.0;
out.write(reinterpret_cast<char*>(&delta), 4);
// write spacer of 10 blank integers
for (int i=0; i <10; ++i) {
out.write(reinterpret_cast<char*>(&zero), 4);
}
out.write(reinterpret_cast<char*>(&magic_number), 4);
// we don't write any titles for now. This means that the block has only to
// accomodate one int, harr, harr, harr.
int32_t title_block_size=4;
out.write(reinterpret_cast<char*>(&title_block_size), 4);
out.write(reinterpret_cast<char*>(&zero), 4);
int32_t four=4;
// again block size for titles?
out.write(reinterpret_cast<char*>(&four), 4);
// has to be 4
out.write(reinterpret_cast<char*>(&four), 4);
int32_t atom_count=coord_group.GetAtomCount();
out.write(reinterpret_cast<char*>(&atom_count), 4);
out.write(reinterpret_cast<char*>(&four), 4);
}
}
void SaveCHARMMTraj(const mol::CoordGroupHandle& coord_group,
const String& pdb_filename, const String& dcd_filename,
unsigned int stepsize)
{
if(stepsize==0) stepsize=1;
if(!pdb_filename.empty()) {
PDBWriter writer(pdb_filename, true);
writer.Write(coord_group.GetAtomList());
}
std::ofstream out(dcd_filename.c_str(), std::ios::binary);
write_dcd_hdr(out, coord_group,stepsize);
int frame_count=coord_group.GetFrameCount();
int atom_count=coord_group.GetAtomCount();
std::vector<float> x(atom_count);
std::vector<float> y(atom_count);
std::vector<float> z(atom_count);
int32_t out_n=atom_count*4;
for (int i=0; i<frame_count; i+=stepsize) {
mol::CoordFramePtr frame=coord_group.GetFrame(i);
int k=0;
for (mol::CoordFrame::iterator j=frame->begin(),
e=frame->end(); j!=e; ++j, ++k) {
//geom::Vec3 v=*j;
x[k]=float((*j)[0]);
y[k]=float((*j)[1]);
z[k]=float((*j)[2]);
}
out.write(reinterpret_cast<char*>(&out_n), 4);
out.write(reinterpret_cast<char*>(&x[0]), out_n);
out.write(reinterpret_cast<char*>(&out_n), 4);
out.write(reinterpret_cast<char*>(&out_n), 4);
out.write(reinterpret_cast<char*>(&y[0]), out_n);
out.write(reinterpret_cast<char*>(&out_n), 4);
out.write(reinterpret_cast<char*>(&out_n), 4);
out.write(reinterpret_cast<char*>(&z[0]), out_n);
out.write(reinterpret_cast<char*>(&out_n), 4);
}
}
}} // ns