diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 7ddde3b2cf53f36807b659cb943f13207ed0a203..e550d96f83dfc160819f860055561534606b48fd 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -141,6 +141,7 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) indices_[AUTH_SEQ_ID] = header.GetIndex("auth_seq_id"); indices_[PDBX_PDB_INS_CODE] = header.GetIndex("pdbx_PDB_ins_code"); indices_[PDBX_PDB_MODEL_NUM] = header.GetIndex("pdbx_PDB_model_num"); + indices_[FORMAL_CHARGE] = header.GetIndex("pdbx_formal_charge"); // post processing if (category_counts_[category_] > 0) { @@ -482,6 +483,7 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) return; } Real occ = 1.00f, temp = 0; + int charge = 0; geom::Vec3 apos; for (int i = CARTN_X; i <= CARTN_Z; ++i) { @@ -505,6 +507,13 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) "atom_site.B_iso_or_equiv"); } } + if (indices_[FORMAL_CHARGE] != -1) { // unit test + String charge_s = columns[indices_[FORMAL_CHARGE]].str(); + if (charge_s != "?" && charge_s != ".") { + charge = this->TryGetInt(columns[indices_[FORMAL_CHARGE]], + "atom_site.pdbx_formal_charge"); + } + } // determine element String s_ele(columns[indices_[TYPE_SYMBOL]].str()); @@ -665,6 +674,8 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) ah.SetOccupancy(occ); + ah.SetCharge(charge); + // record type ah.SetHetAtom(indices_[GROUP_PDB] == -1 ? false : columns[indices_[GROUP_PDB]][0]=='H'); diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 1f21c3928cac354f5f69197e763443d37a2dc9f6..f2f2d2dbd78069a778c6cd3ccab9d49abb7b0cb1 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -359,7 +359,7 @@ protected: private: /// \enum magic numbers of this class typedef enum { - MAX_ITEMS_IN_ROW=18 ///< count for possible items in a loop row + MAX_ITEMS_IN_ROW=19 ///< count for possible items in a loop row } MMCifMagicNos; /// \enum items of the atom_site category @@ -381,7 +381,8 @@ private: B_ISO_OR_EQUIV, PDBX_PDB_INS_CODE, GROUP_PDB, ///< record name - PDBX_PDB_MODEL_NUM ///< model no. (especially NMR structures) + PDBX_PDB_MODEL_NUM,///< model no. (especially NMR structures) + FORMAL_CHARGE } AtomSiteItems; /// \enum items of the entity category diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index e03eb0073be2133e959420e390e3c6f07ec07fde..6d81b46033ab1143be596f7464020706a4645abd 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -1616,5 +1616,22 @@ BOOST_AUTO_TEST_CASE(mmcif_atom_site_B_iso_or_equiv_tests) BOOST_TEST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_formal_charge) +{ + mol::EntityHandle eh = mol::CreateEntity(); + std::ifstream s("testfiles/mmcif/4C79_charged.cif"); + IOProfile profile; + MMCifReader mmcif_p(s, eh, profile); + mmcif_p.Parse(); + + BOOST_CHECK_EQUAL(eh.FindAtom("A", 49, "OE2").GetCharge(), -1); + BOOST_CHECK_EQUAL(eh.FindAtom("A", 49, "OE1").GetCharge(), 0); // '?' + BOOST_CHECK_EQUAL(eh.FindAtom("A", 49, "CA").GetCharge(), 0); // Explicit 0 + BOOST_CHECK_EQUAL(eh.FindAtom("A", 49, "CB").GetCharge(), 0); // '.' + BOOST_CHECK_EQUAL(eh.FindAtom("C", 1, "ZN").GetCharge(), 2); + BOOST_CHECK_EQUAL(eh.FindAtom("D", 1, "NA").GetCharge(), 1); + +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/tests/testfiles/mmcif/4C79_charged.cif b/modules/io/tests/testfiles/mmcif/4C79_charged.cif new file mode 100644 index 0000000000000000000000000000000000000000..c5089608ff706068345942fb2240a2f7394c5a52 --- /dev/null +++ b/modules/io/tests/testfiles/mmcif/4C79_charged.cif @@ -0,0 +1,289 @@ +data_4C79 +# taken and modified from 4C79.cif +_entry.id 4C79 +# +loop_ +_entity.id +_entity.type +_entity.src_method +_entity.pdbx_description +_entity.formula_weight +_entity.pdbx_number_of_molecules +_entity.pdbx_ec +_entity.pdbx_mutation +_entity.pdbx_fragment +_entity.details +1 polymer man SMOOTHENED 22144.174 2 ? ? 'CYSTEINE-RICH DOMAIN (CRD), RESIDUES 28-210' ? +2 non-polymer syn 'ZINC ION' 65.409 1 ? ? ? ? +3 non-polymer syn 'SODIUM ION' 22.990 2 ? ? ? ? +4 water nat water 18.015 48 ? ? ? ? +# +_entity_poly.entity_id 1 +_entity_poly.type 'polypeptide(L)' +_entity_poly.nstd_linkage no +_entity_poly.nstd_monomer no +_entity_poly.pdbx_seq_one_letter_code +;MAVILHPNETIFNDFCKKSTTCEVLKYNTCLGSPLPYTHTSLILAEDSETQEEAFEKLAMWSGLRNAPRCWAVIQPLLCA +VYMPKCENGKVELPSQHLCQATRNPCSIVERERGWPNFLKCENKEQFPKGCQNEVQKLKFNTSGQCEAPLVKTDIQASWY +KDVEGCGIQCDNPLFTEDEHSDMHKLEHHHHHH +; +_entity_poly.pdbx_seq_one_letter_code_can +;MAVILHPNETIFNDFCKKSTTCEVLKYNTCLGSPLPYTHTSLILAEDSETQEEAFEKLAMWSGLRNAPRCWAVIQPLLCA +VYMPKCENGKVELPSQHLCQATRNPCSIVERERGWPNFLKCENKEQFPKGCQNEVQKLKFNTSGQCEAPLVKTDIQASWY +KDVEGCGIQCDNPLFTEDEHSDMHKLEHHHHHH +; +_entity_poly.pdbx_strand_id A +_entity_poly.pdbx_target_identifier ? +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 MET n +1 2 ALA n +1 3 VAL n +1 4 ILE n +1 5 LEU n +1 6 HIS n +1 7 PRO n +1 8 ASN n +1 9 GLU n +1 10 THR n +1 11 ILE n +1 12 PHE n +1 13 ASN n +1 14 ASP n +1 15 PHE n +1 16 CYS n +1 17 LYS n +1 18 LYS n +1 19 SER n +1 20 THR n +1 21 THR n +1 22 CYS n +1 23 GLU n +1 24 VAL n +1 25 LEU n +1 26 LYS n +1 27 TYR n +1 28 ASN n +1 29 THR n +1 30 CYS n +1 31 LEU n +1 32 GLY n +1 33 SER n +1 34 PRO n +1 35 LEU n +1 36 PRO n +1 37 TYR n +1 38 THR n +1 39 HIS n +1 40 THR n +1 41 SER n +1 42 LEU n +1 43 ILE n +1 44 LEU n +1 45 ALA n +1 46 GLU n +1 47 ASP n +1 48 SER n +1 49 GLU n +1 50 THR n +1 51 GLN n +1 52 GLU n +1 53 GLU n +1 54 ALA n +1 55 PHE n +1 56 GLU n +1 57 LYS n +1 58 LEU n +1 59 ALA n +1 60 MET n +1 61 TRP n +1 62 SER n +1 63 GLY n +1 64 LEU n +1 65 ARG n +1 66 ASN n +1 67 ALA n +1 68 PRO n +1 69 ARG n +1 70 CYS n +1 71 TRP n +1 72 ALA n +1 73 VAL n +1 74 ILE n +1 75 GLN n +1 76 PRO n +1 77 LEU n +1 78 LEU n +1 79 CYS n +1 80 ALA n +1 81 VAL n +1 82 TYR n +1 83 MET n +1 84 PRO n +1 85 LYS n +1 86 CYS n +1 87 GLU n +1 88 ASN n +1 89 GLY n +1 90 LYS n +1 91 VAL n +1 92 GLU n +1 93 LEU n +1 94 PRO n +1 95 SER n +1 96 GLN n +1 97 HIS n +1 98 LEU n +1 99 CYS n +1 100 GLN n +1 101 ALA n +1 102 THR n +1 103 ARG n +1 104 ASN n +1 105 PRO n +1 106 CYS n +1 107 SER n +1 108 ILE n +1 109 VAL n +1 110 GLU n +1 111 ARG n +1 112 GLU n +1 113 ARG n +1 114 GLY n +1 115 TRP n +1 116 PRO n +1 117 ASN n +1 118 PHE n +1 119 LEU n +1 120 LYS n +1 121 CYS n +1 122 GLU n +1 123 ASN n +1 124 LYS n +1 125 GLU n +1 126 GLN n +1 127 PHE n +1 128 PRO n +1 129 LYS n +1 130 GLY n +1 131 CYS n +1 132 GLN n +1 133 ASN n +1 134 GLU n +1 135 VAL n +1 136 GLN n +1 137 LYS n +1 138 LEU n +1 139 LYS n +1 140 PHE n +1 141 ASN n +1 142 THR n +1 143 SER n +1 144 GLY n +1 145 GLN n +1 146 CYS n +1 147 GLU n +1 148 ALA n +1 149 PRO n +1 150 LEU n +1 151 VAL n +1 152 LYS n +1 153 THR n +1 154 ASP n +1 155 ILE n +1 156 GLN n +1 157 ALA n +1 158 SER n +1 159 TRP n +1 160 TYR n +1 161 LYS n +1 162 ASP n +1 163 VAL n +1 164 GLU n +1 165 GLY n +1 166 CYS n +1 167 GLY n +1 168 ILE n +1 169 GLN n +1 170 CYS n +1 171 ASP n +1 172 ASN n +1 173 PRO n +1 174 LEU n +1 175 PHE n +1 176 THR n +1 177 GLU n +1 178 ASP n +1 179 GLU n +1 180 HIS n +1 181 SER n +1 182 ASP n +1 183 MET n +1 184 HIS n +1 185 LYS n +1 186 LEU n +1 187 GLU n +1 188 HIS n +1 189 HIS n +1 190 HIS n +1 191 HIS n +1 192 HIS n +1 193 HIS n +# +_struct.entry_id 4C79 +_struct.title 'Crystal structure of the Smoothened CRD, native' +_struct.pdbx_descriptor SMOOTHENED +_struct.pdbx_model_details ? +_struct.pdbx_CASP_flag ? +_struct.pdbx_model_type_details ? +# +_struct_keywords.entry_id 4C79 +_struct_keywords.pdbx_keywords 'SIGNALING PROTEIN' +_struct_keywords.text 'SIGNALING PROTEIN' +# +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.label_entity_id +_atom_site.label_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.pdbx_formal_charge +_atom_site.auth_seq_id +_atom_site.auth_comp_id +_atom_site.auth_asym_id +_atom_site.auth_atom_id +_atom_site.pdbx_PDB_model_num +ATOM 262 N N . GLU A 1 49 ? -25.812 5.207 -4.954 1.00 35.58 ? 75 GLU A N 1 +ATOM 263 C CA . GLU A 1 49 ? -26.815 4.149 -4.979 1.00 36.58 0 75 GLU A CA 1 +ATOM 264 C C . GLU A 1 49 ? -27.600 4.155 -6.288 1.00 35.91 ? 75 GLU A C 1 +ATOM 265 O O . GLU A 1 49 ? -28.206 3.150 -6.663 1.00 35.94 ? 75 GLU A O 1 +ATOM 266 C CB . GLU A 1 49 ? -27.774 4.284 -3.794 1.00 38.91 . 75 GLU A CB 1 +ATOM 267 C CG . GLU A 1 49 ? -27.114 4.163 -2.426 1.00 40.64 ? 75 GLU A CG 1 +ATOM 268 C CD . GLU A 1 49 ? -26.931 2.725 -1.965 1.00 42.04 ? 75 GLU A CD 1 +ATOM 269 O OE1 . GLU A 1 49 ? -26.905 1.807 -2.812 1.00 42.07 ? 75 GLU A OE1 1 +ATOM 270 O OE2 . GLU A 1 49 ? -26.811 2.516 -0.740 1.00 42.97 -1 75 GLU A OE2 1 +HETATM 1881 ZN ZN . ZN C 2 . ? -29.714 -4.622 -22.478 1.00 31.76 2 1159 ZN A ZN 1 +HETATM 1882 NA NA . NA D 3 . ? -23.050 1.721 -4.584 1.00 26.51 1 1160 NA A NA 1 +# +loop_ +_pdbx_entity_nonpoly.entity_id +_pdbx_entity_nonpoly.name +_pdbx_entity_nonpoly.comp_id +2 'ZINC ION' ZN +3 'SODIUM ION' NA +4 water HOH +#