From 425e394de288477592eea0b01fc2b86d842f60de Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavier.robin@unibas.ch> Date: Fri, 17 Mar 2023 17:20:25 +0100 Subject: [PATCH] feat: SCHWED-5481 read gzipped SDF --- modules/io/doc/structure_formats.rst | 2 +- modules/io/src/mol/entity_io_sdf_handler.cc | 2 +- modules/io/src/mol/sdf_reader.cc | 10 ++++++++-- modules/io/src/mol/sdf_reader.hh | 2 ++ modules/io/tests/CMakeLists.txt | 1 + .../io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz | Bin 0 -> 505 bytes 6 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 modules/io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz diff --git a/modules/io/doc/structure_formats.rst b/modules/io/doc/structure_formats.rst index a80133842..42aca8826 100644 --- a/modules/io/doc/structure_formats.rst +++ b/modules/io/doc/structure_formats.rst @@ -54,5 +54,5 @@ SDF - Structured Data File Chemical-data file format. *Recognized File Extensions* - .sdf + .sdf, .sdf.gz diff --git a/modules/io/src/mol/entity_io_sdf_handler.cc b/modules/io/src/mol/entity_io_sdf_handler.cc index a9dced835..a843d986c 100644 --- a/modules/io/src/mol/entity_io_sdf_handler.cc +++ b/modules/io/src/mol/entity_io_sdf_handler.cc @@ -69,7 +69,7 @@ bool sdf_handler_is_responsible_for(const boost::filesystem::path& loc, if(type=="auto") { String match_suf_string=loc.string(); std::transform(match_suf_string.begin(),match_suf_string.end(),match_suf_string.begin(),tolower); - if(detail::FilenameEndsWith(match_suf_string,".sdf")) { + if(detail::FilenameEndsWith(match_suf_string,".sdf") || detail::FilenameEndsWith(match_suf_string,".sdf.gz")) { return true; } diff --git a/modules/io/src/mol/sdf_reader.cc b/modules/io/src/mol/sdf_reader.cc index 064c14f29..7f3a7f560 100644 --- a/modules/io/src/mol/sdf_reader.cc +++ b/modules/io/src/mol/sdf_reader.cc @@ -21,7 +21,9 @@ */ #include <boost/algorithm/string.hpp> +#include <boost/filesystem/convenience.hpp> #include <boost/format.hpp> +#include <boost/iostreams/filter/gzip.hpp> #include <boost/lexical_cast.hpp> #include <ost/mol/bond_handle.hh> #include <ost/conop/conop.hh> @@ -58,7 +60,7 @@ void SDFReader::Import(mol::EntityHandle& ent) { String line; mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT); - while (std::getline(instream_,line)) { + while (std::getline(in_,line)) { ++line_num; // std::getline removes EOL character but may leave a DOS CR (\r) in Unix @@ -87,7 +89,7 @@ void SDFReader::Import(mol::EntityHandle& ent) throw IOException(str(format(msg) % line_num)); } String data_value=""; - while(std::getline(instream_,line) && !boost::iequals(line, "")) { + while(std::getline(in_,line) && !boost::iequals(line, "")) { data_value.append(line); } curr_chain_.SetStringProp(data_header, data_value); @@ -103,6 +105,10 @@ void SDFReader::Import(mol::EntityHandle& ent) void SDFReader::ClearState(const boost::filesystem::path& loc) { + if (boost::iequals(".gz", boost::filesystem::extension(loc))) { + in_.push(boost::iostreams::gzip_decompressor()); + } + in_.push(instream_); if(!infile_) throw IOException("could not open "+loc.string()); curr_chain_=mol::ChainHandle(); curr_residue_=mol::ResidueHandle(); diff --git a/modules/io/src/mol/sdf_reader.hh b/modules/io/src/mol/sdf_reader.hh index e7a478b7a..04d05a2d6 100644 --- a/modules/io/src/mol/sdf_reader.hh +++ b/modules/io/src/mol/sdf_reader.hh @@ -22,6 +22,7 @@ #ifndef OST_IO_SDF_READER_HH #define OST_IO_SDF_READER_HH +#include <boost/iostreams/filtering_stream.hpp> #include <boost/filesystem/fstream.hpp> #include <ost/mol/chain_handle.hh> #include <ost/mol/residue_handle.hh> @@ -61,6 +62,7 @@ private: int line_num; boost::filesystem::ifstream infile_; std::istream& instream_; + boost::iostreams::filtering_stream<boost::iostreams::input> in_; }; }} diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt index 0af1dfd5b..fbfef7413 100644 --- a/modules/io/tests/CMakeLists.txt +++ b/modules/io/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(OST_IO_UNIT_TESTS test_io_pdb.py test_io_mmcif.py test_io_omf.py + test_io_sdf.py test_clustal.cc test_io_pdb.cc test_io_crd.cc diff --git a/modules/io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz b/modules/io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz new file mode 100644 index 0000000000000000000000000000000000000000..658c3b9f089ef67229a54e18c766302083b22b16 GIT binary patch literal 505 zcmV<V0S5jbiwFp2Bokx+12$wecVBX0Zfh}LV{&X}E^}mN0F_lct{X87><#!1DpC{t zQ>@=hnjo)8kvpW%{Sj&<TMm#Q&|+Z>L2Eo5k@Wr6#`FFC>(5{9`N;eIcE2^I#@ZVG z_07!ogAwsQD1<=RY#@O^&O(&n6w$u4e{sCN51Qyn12I@yB^o2Js<yWS8it_2N!XEj zsbQoe8UYIH10i_Fhni41Rl~VxgwY2Lff=0@CBU4+Voo5UQ4}+fRiP&|3fi3>G?J61 zQ@QkUMAIqyq>=2G8U@mRW^o<C&`Fc&GLV(+<cBnY?$csYy)1T6MmwUh(!es+1PV<W zSK%X?I+D3)XzHq>oavN?c#c4aG?Qtu<YD`HO_JNBk*-%!)!Alwbm385%-7)nqx97` zUNk3a0%{6v8Y{s%X^{PF7<^3gy5?F%eU4@rpLt`TqB)BS_BER1bmomg)KE=}W38<! zs{5-q^k15>tgibhMB>XEQtU@X)h?E61J~v$Uvcdlq?4N}89X(QsGwY*S5#1UuAuE) zLEpK8aYa6&f_>)-=02B=@R=(}^vo5ct)(DsEd^<7sm5B-_`TX8qthy)6~+E$H0nE- vkH}{YytNeMt%d*arOve%PI;|wa6XlNYwhz#{i}Ye=YIPQ>#-PNb_W0eRoVB( literal 0 HcmV?d00001 -- GitLab