From 1645c2c9185d77c046043d2458d1dfca875d233d Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavalias-github@xavier.robin.name> Date: Mon, 4 Sep 2023 15:39:59 +0200 Subject: [PATCH] fix: allow short count lines The RCSB ModelServer SDF files contain counts line with only 33 characters. Although invalid, they can be read by RDKit (which only requires 6 characters). This commit emmulates the behavior of RDKit. --- modules/io/src/mol/sdf_reader.cc | 18 ++++++++++++++---- modules/io/tests/test_io_sdf.cc | 12 ++++++++++++ .../io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz | Bin 0 -> 427 bytes 3 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 modules/io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz diff --git a/modules/io/src/mol/sdf_reader.cc b/modules/io/src/mol/sdf_reader.cc index 381f5cbe9..660a9dcb5 100644 --- a/modules/io/src/mol/sdf_reader.cc +++ b/modules/io/src/mol/sdf_reader.cc @@ -182,12 +182,22 @@ void SDFReader::ParseHeader(const String& line, int line_num, break; case 4: // counts line { - if (line.length() < 39) { - String msg="Bad counts line %d: Not correct number of characters on " - "the line: %i (should be at least 39)"; + String version_str; + if (line.length() < 6) { + String msg="Bad counts line %d: too short (%i characters, " + "should be at least 6 or 39)"; throw IOException(str(format(msg) % line_num % line.length())); } - String version_str=line.substr(34, 5); + else if (line.length() < 39) { + String msg="Bad counts line %d: too short (%i characters, " + "should be at least 39). " + "Proceeding assuming V2000 format."; + LOG_WARNING(str(format(msg) % line_num % line.length())); + version_str="V2000"; + } + else { + version_str=line.substr(34, 5); + } if (version_str == "V2000" || version_str == "V3000") { version_=version_str; } diff --git a/modules/io/tests/test_io_sdf.cc b/modules/io/tests/test_io_sdf.cc index df84041b0..68009e9c9 100644 --- a/modules/io/tests/test_io_sdf.cc +++ b/modules/io/tests/test_io_sdf.cc @@ -252,5 +252,17 @@ BOOST_AUTO_TEST_CASE(empty_dataheader_error_sdf) BOOST_CHECK_THROW(sdfh.Import(eh,"testfiles/sdf/empty_dataheader.sdf"), IOException); } +BOOST_AUTO_TEST_CASE(rcsb_modelserver_sdf) +{ + // Check that we can read invalid SDF files from the RCSB model server. + // These files have too short + mol::EntityHandle eh=mol::CreateEntity(); + EntityIOSDFHandler sdfh; + sdfh.Import(eh,"testfiles/sdf/1atg_C_ACT.sdf.gz"); + + // check success + BOOST_CHECK_EQUAL(eh.GetChainCount(), 1); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz b/modules/io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz new file mode 100644 index 0000000000000000000000000000000000000000..436ae0021cac71c13e18238d165b9ebde27cb208 GIT binary patch literal 427 zcmb2|=HQS#{xyYx*)XvrJ>EIq(K$q~I3<l?>ZF5yEd~N@?`yl-DyA&cWu4xucyj@} z>8pcZ?})7ETDaj-(k7|<cLh77!?UM|IQ_5tTyAwabf23<eA(2zypsQGs(n1Yf>;(E zusAg17n^{E&q={_gTp#%O0xeed^DblJpVEM!FoMzq2ozqoe@U6Iove&{afvQLgR?v zPL4?zG{qg2_7!*jf1)Jz@x`PAH~XH&EXw%UFKpN$d3>K#@BP})H>;;h%#WM9fJc6I z|1l5iGPiHH_<EK5V-;U(Ie$GC^W<&WT=6eYzbWpFoI8K*6Rw|!w4QER-8-#W+xUoP zztgO?kc4B?r|AAXByD;A!Pe62X&L(z<$h&e_#3wE)C(TduY0q!_ZOOfSt`fBd1coI zjn~%ttE1Vs$!i3^`)9o^aIeucpWj;7B{Q>^@4d4*i;W?^*-doAiZ|C9DmU<adSDRH zT+Q$=Geqgsy?wVMmd{ZwRX;vSYWBh(?UQ^&ax%a2iP(o!F`bTcp8RCXCgX1pgO}P{ p&I^!Vd0Xg+>??NZ+y$35y=lAVxQaveK=VDu%X5x}tFbUJ001wG&Vc{` literal 0 HcmV?d00001 -- GitLab