From 1645c2c9185d77c046043d2458d1dfca875d233d Mon Sep 17 00:00:00 2001
From: Xavier Robin <xavalias-github@xavier.robin.name>
Date: Mon, 4 Sep 2023 15:39:59 +0200
Subject: [PATCH] fix: allow short count lines

The RCSB ModelServer SDF files contain counts line with only 33
characters. Although invalid, they can be read by RDKit (which only
requires 6 characters). This commit emmulates the behavior of RDKit.
---
 modules/io/src/mol/sdf_reader.cc              |  18 ++++++++++++++----
 modules/io/tests/test_io_sdf.cc               |  12 ++++++++++++
 .../io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz  | Bin 0 -> 427 bytes
 3 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 modules/io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz

diff --git a/modules/io/src/mol/sdf_reader.cc b/modules/io/src/mol/sdf_reader.cc
index 381f5cbe9..660a9dcb5 100644
--- a/modules/io/src/mol/sdf_reader.cc
+++ b/modules/io/src/mol/sdf_reader.cc
@@ -182,12 +182,22 @@ void SDFReader::ParseHeader(const String& line, int line_num,
       break;
     case 4:  // counts line
     {
-      if (line.length() < 39) {
-        String msg="Bad counts line %d: Not correct number of characters on "
-                   "the line: %i (should be at least 39)";
+      String version_str;
+      if (line.length() < 6) {
+        String msg="Bad counts line %d: too short (%i characters, "
+                   "should be at least 6 or 39)";
         throw IOException(str(format(msg) % line_num % line.length()));
       }
-      String version_str=line.substr(34, 5);
+      else if (line.length() < 39) {
+        String msg="Bad counts line %d: too short (%i characters,  "
+                   "should be at least 39). "
+                   "Proceeding assuming V2000 format.";
+        LOG_WARNING(str(format(msg) % line_num % line.length()));
+        version_str="V2000";
+      }
+      else {
+        version_str=line.substr(34, 5);
+      }
       if (version_str == "V2000" || version_str == "V3000") {
         version_=version_str;
       }
diff --git a/modules/io/tests/test_io_sdf.cc b/modules/io/tests/test_io_sdf.cc
index df84041b0..68009e9c9 100644
--- a/modules/io/tests/test_io_sdf.cc
+++ b/modules/io/tests/test_io_sdf.cc
@@ -252,5 +252,17 @@ BOOST_AUTO_TEST_CASE(empty_dataheader_error_sdf)
   BOOST_CHECK_THROW(sdfh.Import(eh,"testfiles/sdf/empty_dataheader.sdf"), IOException);
 }
 
+BOOST_AUTO_TEST_CASE(rcsb_modelserver_sdf)
+{
+  // Check that we can read invalid SDF files from the RCSB model server.
+  // These files have too short
+  mol::EntityHandle eh=mol::CreateEntity();
+  EntityIOSDFHandler sdfh;
+  sdfh.Import(eh,"testfiles/sdf/1atg_C_ACT.sdf.gz");
+
+  // check success
+  BOOST_CHECK_EQUAL(eh.GetChainCount(), 1);
+}
+
 
 BOOST_AUTO_TEST_SUITE_END();
diff --git a/modules/io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz b/modules/io/tests/testfiles/sdf/1atg_C_ACT.sdf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..436ae0021cac71c13e18238d165b9ebde27cb208
GIT binary patch
literal 427
zcmb2|=HQS#{xyYx*)XvrJ>EIq(K$q~I3<l?>ZF5yEd~N@?`yl-DyA&cWu4xucyj@}
z>8pcZ?})7ETDaj-(k7|<cLh77!?UM|IQ_5tTyAwabf23<eA(2zypsQGs(n1Yf>;(E
zusAg17n^{E&q={_gTp#%O0xeed^DblJpVEM!FoMzq2ozqoe@U6Iove&{afvQLgR?v
zPL4?zG{qg2_7!*jf1)Jz@x`PAH~XH&EXw%UFKpN$d3>K#@BP})H>;;h%#WM9fJc6I
z|1l5iGPiHH_<EK5V-;U(Ie$GC^W<&WT=6eYzbWpFoI8K*6Rw|!w4QER-8-#W+xUoP
zztgO?kc4B?r|AAXByD;A!Pe62X&L(z<$h&e_#3wE)C(TduY0q!_ZOOfSt`fBd1coI
zjn~%ttE1Vs$!i3^`)9o^aIeucpWj;7B{Q>^@4d4*i;W?^*-doAiZ|C9DmU<adSDRH
zT+Q$=Geqgsy?wVMmd{ZwRX;vSYWBh(?UQ^&ax%a2iP(o!F`bTcp8RCXCgX1pgO}P{
p&I^!Vd0Xg+>??NZ+y$35y=lAVxQaveK=VDu%X5x}tFbUJ001wG&Vc{`

literal 0
HcmV?d00001

-- 
GitLab