From 425e394de288477592eea0b01fc2b86d842f60de Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavier.robin@unibas.ch> Date: Fri, 17 Mar 2023 17:20:25 +0100 Subject: [PATCH] feat: SCHWED-5481 read gzipped SDF --- modules/io/doc/structure_formats.rst | 2 +- modules/io/src/mol/entity_io_sdf_handler.cc | 2 +- modules/io/src/mol/sdf_reader.cc | 10 ++++++++-- modules/io/src/mol/sdf_reader.hh | 2 ++ modules/io/tests/CMakeLists.txt | 1 + .../io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz | Bin 0 -> 505 bytes 6 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 modules/io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz diff --git a/modules/io/doc/structure_formats.rst b/modules/io/doc/structure_formats.rst index a80133842..42aca8826 100644 --- a/modules/io/doc/structure_formats.rst +++ b/modules/io/doc/structure_formats.rst @@ -54,5 +54,5 @@ SDF - Structured Data File Chemical-data file format. *Recognized File Extensions* - .sdf + .sdf, .sdf.gz diff --git a/modules/io/src/mol/entity_io_sdf_handler.cc b/modules/io/src/mol/entity_io_sdf_handler.cc index a9dced835..a843d986c 100644 --- a/modules/io/src/mol/entity_io_sdf_handler.cc +++ b/modules/io/src/mol/entity_io_sdf_handler.cc @@ -69,7 +69,7 @@ bool sdf_handler_is_responsible_for(const boost::filesystem::path& loc, if(type=="auto") { String match_suf_string=loc.string(); std::transform(match_suf_string.begin(),match_suf_string.end(),match_suf_string.begin(),tolower); - if(detail::FilenameEndsWith(match_suf_string,".sdf")) { + if(detail::FilenameEndsWith(match_suf_string,".sdf") || detail::FilenameEndsWith(match_suf_string,".sdf.gz")) { return true; } diff --git a/modules/io/src/mol/sdf_reader.cc b/modules/io/src/mol/sdf_reader.cc index 064c14f29..7f3a7f560 100644 --- a/modules/io/src/mol/sdf_reader.cc +++ b/modules/io/src/mol/sdf_reader.cc @@ -21,7 +21,9 @@ */ #include <boost/algorithm/string.hpp> +#include <boost/filesystem/convenience.hpp> #include <boost/format.hpp> +#include <boost/iostreams/filter/gzip.hpp> #include <boost/lexical_cast.hpp> #include <ost/mol/bond_handle.hh> #include <ost/conop/conop.hh> @@ -58,7 +60,7 @@ void SDFReader::Import(mol::EntityHandle& ent) { String line; mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT); - while (std::getline(instream_,line)) { + while (std::getline(in_,line)) { ++line_num; // std::getline removes EOL character but may leave a DOS CR (\r) in Unix @@ -87,7 +89,7 @@ void SDFReader::Import(mol::EntityHandle& ent) throw IOException(str(format(msg) % line_num)); } String data_value=""; - while(std::getline(instream_,line) && !boost::iequals(line, "")) { + while(std::getline(in_,line) && !boost::iequals(line, "")) { data_value.append(line); } curr_chain_.SetStringProp(data_header, data_value); @@ -103,6 +105,10 @@ void SDFReader::Import(mol::EntityHandle& ent) void SDFReader::ClearState(const boost::filesystem::path& loc) { + if (boost::iequals(".gz", boost::filesystem::extension(loc))) { + in_.push(boost::iostreams::gzip_decompressor()); + } + in_.push(instream_); if(!infile_) throw IOException("could not open "+loc.string()); curr_chain_=mol::ChainHandle(); curr_residue_=mol::ResidueHandle(); diff --git a/modules/io/src/mol/sdf_reader.hh b/modules/io/src/mol/sdf_reader.hh index e7a478b7a..04d05a2d6 100644 --- a/modules/io/src/mol/sdf_reader.hh +++ b/modules/io/src/mol/sdf_reader.hh @@ -22,6 +22,7 @@ #ifndef OST_IO_SDF_READER_HH #define OST_IO_SDF_READER_HH +#include <boost/iostreams/filtering_stream.hpp> #include <boost/filesystem/fstream.hpp> #include <ost/mol/chain_handle.hh> #include <ost/mol/residue_handle.hh> @@ -61,6 +62,7 @@ private: int line_num; boost::filesystem::ifstream infile_; std::istream& instream_; + boost::iostreams::filtering_stream<boost::iostreams::input> in_; }; }} diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt index 0af1dfd5b..fbfef7413 100644 --- a/modules/io/tests/CMakeLists.txt +++ b/modules/io/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(OST_IO_UNIT_TESTS test_io_pdb.py test_io_mmcif.py test_io_omf.py + test_io_sdf.py test_clustal.cc test_io_pdb.cc test_io_crd.cc diff --git a/modules/io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz b/modules/io/tests/testfiles/sdf/6d5w_rank1_crlf.sdf.gz new file mode 100644 index 0000000000000000000000000000000000000000..658c3b9f089ef67229a54e18c766302083b22b16 GIT binary patch literal 505 zcmb2|=HN(E5l&%XHcK%rk1tBh%QlQpF3L&MD^5vcm>O!mPTG*?4et*=tsvn)!E5e& z&r(?LG0}R-`M-i$s?mHC6fPts@;GMO$V~k5_u8?~fB(KdAOH5}iQj+A_M0v{c1!Bt z*RyB7H4A>U(`a$Hk)zP)aL#GU1CfjSw$vBy*~dTYg(tINM3kzO@`hF0wlhn$EjZwL zV`AmX#3jm7j9jmo6)TRn&kC>%J-joc<sv(8qvTvs)dO=5C;2HX3KTJ(5V}y^gzMH^ zIg?3~mj&<qQs{D7^VgDzZ>CGJUaCtgls$OCbMi~02~*!bX*X$ncR49&uTEKjoAsiN zmkt<RW#RIb3V*0P%WBfjjEq;S7R`A%rLCgRq1|M1=Ef(9zbkzw-Ci<rU3lQtn`xCL z4+CS*Tz|;0_{&xEI1}|OrYx>pDeZ$+nG^ozNYwaz-t{cXwYD!!Vt!@vf<>0yT;EJ* zJt=wC>v$!=w>x?5TCG*TSDSx{pS5w#y5E{Ek4{Smy>WNFsy8j0`T8@>_`<i+OD50O zlC&}HU$G!&et1Yg`ML#fV;t}AY$|lMThvthoa>q0^xh9MwLQL^5nZx&sY0$UYi9P! zV^J4>?7gM9__Ed_(WC#;OkP<}?>~{wyJxG&leMk?KP)}BRot&~ZGM5>)Y|N~pWXki Tu3PoI?l<r2jS`7v><kP5RoVB( literal 0 HcmV?d00001 -- GitLab