diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc index 495e79a8c7d554f39025ffb705fc61bd2f77f6dc..23a361a6dacdb0b42c889c38b8b11def18232ad2 100644 --- a/modules/io/src/mol/mmcif_writer.cc +++ b/modules/io/src/mol/mmcif_writer.cc @@ -878,9 +878,10 @@ namespace { } } + bool all_hetatm = entity_info.type == "non-polymer"; for(auto at: at_list) { // group_PDB - if(at.IsHetAtom()) { + if(at.IsHetAtom() || all_hetatm) { at_data[0] = ost::io::StarWriterValue::FromString("HETATM"); } else { at_data[0] = ost::io::StarWriterValue::FromString("ATOM"); @@ -1182,8 +1183,25 @@ namespace { String poly_type = poly_types[i]; if(poly_chains[i]->size() <= 2) { // must have length of at least 3 to be polymer + // feed them as separate non-polymers type = "non-polymer"; poly_type = ""; + String branch_type = ""; + for(auto r: *poly_chains[i]) { + T tmp; + tmp.push_back(r); + String chain_name = chain_name_gen.Get(); + int entity_id = SetupEntity(chain_name, + type, + poly_type, + branch_type, + tmp, + false, + entity_info); + Feed_atom_site(atom_site, chain_name, entity_id+1, entity_info[entity_id], + tmp); + } + continue; } String branch_type = ""; String chain_name = chain_name_gen.Get(); diff --git a/modules/io/tests/test_mmcif_writer.cc b/modules/io/tests/test_mmcif_writer.cc index 353ba674788071d362c78b42035bfe823785cd4b..8661a7327983badb4c0d3df44beb9eb073000030 100644 --- a/modules/io/tests/test_mmcif_writer.cc +++ b/modules/io/tests/test_mmcif_writer.cc @@ -20,10 +20,11 @@ #define BOOST_TEST_DYN_LINK #include <boost/test/unit_test.hpp> -#include <ost/platform.hh> #include <ost/conop/conop.hh> -#include <ost/mol/mol.hh> +#include <ost/conop/heuristic.hh> #include <ost/io/mol/mmcif_writer.hh> +#include <ost/mol/mol.hh> +#include <ost/platform.hh> using namespace ost; using namespace ost::io; @@ -47,7 +48,6 @@ conop::CompoundLibPtr SetDefaultCompoundLib() { BOOST_AUTO_TEST_CASE(mmcif_writer_entity1) { BOOST_TEST_MESSAGE(" Running mmcif_writer_entity1 tests..."); - /* Make sure molecular entities in mmCIF files written by OST start counting at ID 1. This is not enforced by the mmCIF format definition, but common @@ -60,9 +60,7 @@ BOOST_AUTO_TEST_CASE(mmcif_writer_entity1) mol::ChainHandle ch=edi.InsertChain("A"); mol::ResidueHandle r=edi.AppendResidue(ch, "GLY"); mol::AtomHandle a=edi.InsertAtom(r, "CA", geom::Vec3(32.0, -128.0, -2.5), - "C"); - a.SetOccupancy(1.0); - a.SetBFactor(128.0); + "C", 1.0, 128.0); edi.SetChainType(ch, mol::CHAINTYPE_UNKNOWN); // Create mmCIF stream @@ -73,10 +71,90 @@ BOOST_AUTO_TEST_CASE(mmcif_writer_entity1) // Check if entity starts with 1, either by reading mmCIF or "grep" String s=out.str(); - BOOST_CHECK_EQUAL( - s.substr(0, 53), - "data_test\nloop_\n_entity.id\n_entity.type\n1 non-polymer" - ); + BOOST_CHECK_NE(s.find("_entity.id\n_entity.type\n1 non-polymer"), + String::npos); + + BOOST_TEST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_writer_poly_vs_non_poly) +{ + BOOST_TEST_MESSAGE(" Running mmcif_writer_poly_vs_non_poly tests..."); + /* + Go for small polymers that are not polymer... the story of 2 amino acids (to + be handled like RCSB as 2 separated chains) plus how the same thing works + for nucleic acids and branched molecules (sugars). + */ + + // Polypeptide: 2aa in a chain are 2 separated entities for RCSB (check 1E8K) + mol::EntityHandle ent=mol::CreateEntity(); + mol::XCSEditor edi=ent.EditXCS(); + mol::ChainHandle ch=edi.InsertChain("A"); + edi.SetChainType(ch, mol::CHAINTYPE_POLY_PEPTIDE_L); + // ALA + mol::ResidueHandle r1=edi.AppendResidue(ch, "ALA"); // is_hetatm=false + edi.InsertAtom(r1, "N", geom::Vec3(44.987, 17.389, 12.362), "N", 0.81, + 25.57); + edi.InsertAtom(r1, "CA", geom::Vec3(45.936, 16.434, 12.890), "C", 0.81, + 28.21); + edi.InsertAtom(r1, "C", geom::Vec3(47.196, 17.227, 13.152), "C", 0.81, + 33.78); + edi.InsertAtom(r1, "O", geom::Vec3(47.506, 18.153, 12.401), "O", 0.81, + 23.02); + edi.InsertAtom(r1, "CB", geom::Vec3(46.244, 15.293, 11.961), "C", 0.81, + 29.85); + // PRO + mol::ResidueHandle r2=edi.AppendResidue(ch, "PRO"); // is_hetatm=false + edi.InsertAtom(r2, "N", geom::Vec3(47.953, 16.910, 14.229), "N", 0.81, + 32.19 ); + edi.InsertAtom(r2, "CA", geom::Vec3(47.673, 15.829, 15.187), "C", 0.81, + 33.89 ); + edi.InsertAtom(r2, "C", geom::Vec3(46.564, 16.052, 16.233), "C", 0.81, + 37.97 ); + edi.InsertAtom(r2, "O", geom::Vec3(46.059, 17.169, 16.417), "O", 0.81, + 34.43 ); + edi.InsertAtom(r2, "CB", geom::Vec3(49.054, 15.755, 15.880), "C", 0.81, + 36.60 ); + edi.InsertAtom(r2, "CG", geom::Vec3(49.357, 17.213, 16.030), "C", 0.81, + 34.77 ); + edi.InsertAtom(r2, "CD", geom::Vec3(49.098, 17.714, 14.637), "C", 0.81, + 34.62 ); + edi.InsertAtom(r2, "OXT", geom::Vec3(46.144, 15.129, 16.950), "O", 0.81, + 34.03 ); + conop::HeuristicProcessor heu_proc; + // Make sure that the two residues r1, r2 are actually connected + heu_proc.Process(ent); + BOOST_CHECK_EQUAL(mol::InSequence(r1, r2), true); + + // Create mmCIF stream + MMCifWriter writer; + writer.SetStructure(ent, SetDefaultCompoundLib(), false); + std::stringstream out; + writer.Write("test", out); + + // Check that the mmCIF output contains 2 non-polymer entities + String s=out.str(); + BOOST_CHECK_NE( + s.find("loop_\n_entity.id\n_entity.type\n1 non-polymer\n2 non-polymer"), + String::npos); + BOOST_CHECK_NE( + s.find("loop_\n_struct_asym.id\n_struct_asym.entity_id\nA 1\nB 2"), + String::npos); + BOOST_CHECK_NE(s.find("HETATM N N ALA"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C CA ALA"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C C ALA"), String::npos); + BOOST_CHECK_NE(s.find("HETATM O O ALA"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C CB ALA"), String::npos); + BOOST_CHECK_NE(s.find("HETATM N N PRO"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C CA PRO"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C C PRO"), String::npos); + BOOST_CHECK_NE(s.find("HETATM O O PRO "), String::npos); + BOOST_CHECK_NE(s.find("HETATM C CB PRO"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C CG PRO"), String::npos); + BOOST_CHECK_NE(s.find("HETATM C CD PRO"), String::npos); + BOOST_CHECK_NE(s.find("HETATM O OXT PRO"), String::npos); + + // write mmCIF, check atoms are marked HETATM BOOST_TEST_MESSAGE(" done."); }