diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 75f42288891fb6cb1a1b5aa83e9de7c2710b3fb5..6fd6997721b30d45909c353be39c9cbde1aa5274 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -847,7 +847,8 @@ of the annotation available. Since this function is at the moment mainly used to create biounits from mmCIF files to be saved as PDBs, the function assumes that the - :class:`~ost.mol.ChainType` properties are set correctly. + :class:`~ost.mol.ChainType` properties are set correctly. For a more + mmCIF-style of doing things read this: :ref:`Biounits <Biounits>` :param asu: Asymmetric unit to work on. Should be created from a mmCIF file. @@ -1333,6 +1334,20 @@ of the annotation available. See :attr:`bond_order` +Biounits +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _Biounits: + +Biological assemblies, i.e. biounits, are an integral part of mmCIF files and +their construction is fully defined in :class:`MMCifInfoBioUnit`. +:func:`MMCifInfoBioUnit.PDBize` provides one possibility to construct such biounits +with compatibility with the PDB format in mind. That is single character chain +names, dumping all ligands in one chain etc. For a more mmCIF-style way of +constructing biounits, check out :func:`ost.mol.alg.CreateBU` in the +*ost.mol.alg* module. + + .. |exptl.method| replace:: ``exptl.method`` .. _exptl.method: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_exptl.method.html diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 2af30b8c887130e9e1068c526f321ecd971df1ea..3735cce053abfab2d9c34fcaab5d307e080ccd08 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -1414,3 +1414,71 @@ API :type ent: :class:`~ost.mol.EntityHandle` :param lib: Compound library :type lib: :class:`~ost.conop.CompoundLib` + + + +Biounits +-------------------------------------------------------------------------------- + +Biological assemblies, i.e. biounits, are an integral part of mmCIF files and +their construction is fully defined in :class:`ost.io.MMCifInfoBioUnit`. +:func:`ost.io.MMCifInfoBioUnit.PDBize` provides one possibility to construct +such biounits with compatibility with the PDB format in mind. That is single +character chain names, dumping all ligands in one chain etc. Here we provide a +more mmCIF-style way of constructing biounits. This can either be done starting +from a :class:`ost.io.MMCifInfoBioUnit` or the derived +:class:`ost.mol.alg.BUInfo`. The latter is a minimalistic representation of +:class:`ost.io.MMCifInfoBioUnit` and can be serialized to a byte string. + +.. class:: BUInfo(mmcif_buinfo): + + Preprocesses data from :class:`ost.io.MMCifInfoBioUnit` that are required + to construct a biounit from an assymetric unit. Can be serialized. + + :param mmcif_buinfo: Biounit definition + :type mmcif_buinfo: :class:`ost.io.MMCifInfoBioUnit` + + .. method:: ToBytes() + + :returns: A byte string from which the object can be reconstructed. + + .. staticmethod:: FromBytes(byte_string) + + :param byte_string: Can be created with :func:`ToBytes` + :returns: A :class:`BUInfo` object + +.. function:: CreateBU(asu, bu_info) + + Constructs a biounit given an assymetric unit and transformation + information. The following properties are copied from the assymetric + unit and are expected to be set (this is the case if you use + :func:`ost.io.LoadMMCIF` for the assymetric unit): + + * Chain level: + + * Chain type (see :attr:`ost.mol.ChainHandle.type`) + + * Residue level: + + * Chem type (see :attr:`ost.mol.ResidueHandle.chem_type`) + * Chem class (:attr:`ost.mol.ResidueHandle.chem_class`) + * One letter code (see :attr:`ost.mol.ResidueHandle.one_letter_code`) + * Secondary structure (see :attr:`ost.mol.ResidueHandle.sec_structure`) + * IsProtein and IsLigand properties (see :attr:`ost.mol.ResidueHandle.is_protein`/:attr:`ost.mol.ResidueHandle.is_ligand`) + + Each chain in the returned biounit can be referenced back to the + assymetric unit as they follow a standardised naming scheme: + <*idx*>.<*asu_cname*>, where *asu_cname* is the chain name in the assymetric + unit and *idx* is the nth occurence of that chain in the biounit with + one based indexing. There is a quirk though to be more consistent with the + biounits that you download from RCSB. An index of 1, for example 1.A, + is reserved for the original AU chain with identity transform (read: no + transform) applied. If a certain AU chain only occurs with an actual + transform applied, numbering starts at 2. + + :param asu: The assymetric unit + :type asu: :class:`ost.mol.EntityHandle` + :param bu_info: Info object + :type bu_info: :class:`MMCifInfoBioUnit`/:class:`BUInfo` + :returns: A :class:`ost.mol.EntityHandle` of the requested biounit + diff --git a/modules/mol/alg/src/biounit.cc b/modules/mol/alg/src/biounit.cc index 2ff12d7dd7950e63452ab8599290b338902e2b87..74efb52994ba41c3246641859b00128b85c79ba3 100644 --- a/modules/mol/alg/src/biounit.cc +++ b/modules/mol/alg/src/biounit.cc @@ -237,9 +237,15 @@ ost::mol::EntityHandle CreateBU(const ost::mol::EntityHandle& asu, ent.SetName(asu.GetName()); ost::mol::XCSEditor ed = ent.EditXCS(mol::BUFFERED_EDIT); - // For chain naming. First occurence: 1.<au_cname>, Second: 2.<au_cname> etc. + // For chain naming. First copy with transformation: 2.<au_cname>, second + // 3.<au_cname> etc. std::map<String, int> chain_counter; + // The name 1.<au_cname> is reserved for that particular AU chain with + // identity transform, i.e. the copy of the actual AU chain. We need to keep + // track of this as there can only be one. + std::set<String> au_chain_copies; + const std::vector<std::vector<String> >& au_chains = bu_info.GetAUChains(); const std::vector<std::vector<geom::Mat4> >& transforms = bu_info.GetTransformations(); @@ -259,17 +265,44 @@ ost::mol::EntityHandle CreateBU(const ost::mol::EntityHandle& asu, // process all transformations for(uint t_idx = 0; t_idx < transforms[chain_intvl].size(); ++t_idx) { const geom::Mat4& m = transforms[chain_intvl][t_idx]; + + // check if m is identity matrix => no transformation applied + bool is_identity = true; + geom::Mat4 identity_matrix = geom::Mat4::Identity(); + const Real* m_data = m.Data(); + const Real* identity_data = identity_matrix.Data(); + for(int i = 0; i < 16; ++i) { + if(std::abs(m_data[i] - identity_data[i]) > 1e-5) { + is_identity = false; + break; + } + } + // key: au_at.GetHashCode, value: bu_at // required for bond buildup in the end std::map<long, AtomHandle> atom_mapper; for(uint c_idx = 0; c_idx < au_chains[chain_intvl].size(); ++c_idx) { String au_cname = au_chains[chain_intvl][c_idx]; - if(chain_counter.find(au_cname) == chain_counter.end()) { - chain_counter[au_cname] = 1; - } + std::stringstream bu_cname_ss; - bu_cname_ss << chain_counter[au_cname] << '.' << au_cname; - chain_counter[au_cname] += 1; + if(is_identity) { + if(au_chain_copies.find(au_cname) != au_chain_copies.end()) { + std::stringstream err; + err<<"Try to insert copy of AU chain "<<au_cname<<" with identity "; + err<<"transform, i.e. copy the raw coordinates. This has already "; + err<<"been done for this AU chain and there can only be one."; + throw ost::Error(err.str()); + } + bu_cname_ss << "1." << au_cname; // 1.<au_cname> reserved for AU chain + // without transformation + au_chain_copies.insert(au_cname); + } else { + if(chain_counter.find(au_cname) == chain_counter.end()) { + chain_counter[au_cname] = 2; + } + bu_cname_ss << chain_counter[au_cname] << '.' << au_cname; + chain_counter[au_cname] += 1; + } ost::mol::ChainHandle asu_ch = asu.FindChain(au_cname); if(!asu_ch.IsValid()) { std::stringstream ss;