diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index c53a8b858d090bd3233170164482d40d50cbf1f9..2eafc9c9bd1f8a49c74902504c9df244a9a6b550 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -798,7 +798,7 @@ of the annotation available. See :attr:`operationsintervalls` - .. function:: PDBize(asu, seqres=None, min_polymer_size=10, transformation=False) + .. function:: PDBize(asu, seqres=None, min_polymer_size=None, transformation=False, peptide_min_size=10, nucleicacid_min_size=10, saccharide_min_size=10) Returns the biological assembly (bio unit) for an entity. The new entity created is well suited to be saved as a PDB file. Therefore the function @@ -810,7 +810,8 @@ of the annotation available. - Each polymer gets its own chain, named A-Z 0-9 a-z. - The description of non-polymer chains will be put into a generic string property called description on the residue level. - - Ligands that resemble a polymer but have less than *min_polymer_size* + - Ligands that resemble a polymer but have less than *min_polymer_size* / + *peptide_min_size* / *nucleicacid_min_size* / *saccharide_min_size* residues are assigned the same numeric residue number. The residues are distinguished by insertion code. - Sometimes bio units exceed the coordinate system storable in a PDB file. @@ -830,11 +831,21 @@ of the annotation available. :type seqres: :class:`~ost.seq.SequenceList` :param min_polymer_size: The minimal number of residues a polymer needs to get its own chain. Everything below that number will be sorted into the - ligand chain. + ligand chain. Overrides *peptide_min_size*, *nucleicacid_min_size* and + *saccharide_min_size* if set to a value different than None. :type min_polymer_size: int :param transformation: If set, return the transformation matrix used to move the bounding box of the bio unit to the lower left corner. :type transformation: :class:`bool` + :param peptide_min_size: Minimal size to get an individual chain for a + polypeptide. Is overridden by *min_polymer_size*. + :type peptide_min_size: :class:`int` + :param nucleicacid_min_size: Minimal size to get an individual chain for a + polynucleotide. Is overridden by *min_polymer_size*. + :type nucleicacid_min_size: :class:`int` + :param saccharide_min_size: Minimal size to get an individual chain for an + oligosaccharide or polysaccharide. Is overridden by *min_polymer_size*. + :type saccharide_min_size: :class:`int` .. class:: MMCifInfoStructDetails diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index cf8063d011627d477fd7401d004d7b6664a621f7..a0a29a6a43a98691faf1f1b54650f1fff5a0600a 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -358,9 +358,15 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT' # arguement is the usual 'self'. # documentation for this function was moved to mmcif.rst, # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX. -def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, - transformation=False): - pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size) +def _PDBize(biounit, asu, seqres=None, min_polymer_size=None, + transformation=False, peptide_min_size=10, nucleicacid_min_size=10, + saccharide_min_size=10): + if min_polymer_size is not None: + pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size) + else: + pdbizer = mol.alg.PDBize(peptide_min_size=peptide_min_size, + nucleicacid_min_size=nucleicacid_min_size, + saccharide_min_size=saccharide_min_size) chains = biounit.GetChainList() c_intvls = biounit.GetChainIntervalList() diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index fd5b6d94de273dc57317e270d58596534fb5131f..d36877c0fb9024c9a200ca0e327fd9b68f900cbd 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -439,7 +439,10 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) class_<mol::alg::PDBize>("PDBize", - init<int>(arg("min_polymer_size")=10)) + init<int,int,int>((arg("peptide_min_size"), + arg("nucleicacid_min_size"), + arg("saccharide_min_size")))) + .def(init<int>(arg("min_polymer_size")=10)) .def("Add", &mol::alg::PDBize::Add, (arg("asu"), arg("transformations"), arg("seqres"))) .def("Finish", &mol::alg::PDBize::Finish, arg("shift_to_fit")=true) diff --git a/modules/mol/alg/src/pdbize.cc b/modules/mol/alg/src/pdbize.cc index 84853c6f0d20502c94d5bfb38f110ff3abf7b46d..c53749ff4bb06dd589940a1a057536a36b6a7558 100644 --- a/modules/mol/alg/src/pdbize.cc +++ b/modules/mol/alg/src/pdbize.cc @@ -76,12 +76,19 @@ void PDBize::Add(EntityView asu, const geom::Mat4List& transforms, e2 =asu.GetChainList().end(); j != e2; ++j) { ChainView chain = *j; int chain_length = chain.GetResidueCount(); - if (chain_length < min_polymer_size_ && seqres.IsValid()) { + if (((chain.IsPolypeptide() && chain_length < peptide_min_size_) || + (chain.IsPolynucleotide() && chain_length < nucleicacid_min_size_) || + ((chain.IsOligosaccharide() || chain.IsPolysaccharide()) && + chain_length < saccharide_min_size_)) && + seqres.IsValid()) { seq::SequenceHandle s = seqres.FindSequence(chain.GetName()); if (s.IsValid()) chain_length = s.GetLength(); } - if (chain.IsPolymer() && chain_length >= min_polymer_size_) { + if ((chain.IsPolypeptide() && chain_length >= peptide_min_size_) || + (chain.IsPolynucleotide() && chain_length >= nucleicacid_min_size_) || + ((chain.IsOligosaccharide() || chain.IsPolysaccharide()) && + chain_length >= saccharide_min_size_)) { if (*curr_chain_name_ == 0) { throw std::runtime_error("running out of chain names"); } diff --git a/modules/mol/alg/src/pdbize.hh b/modules/mol/alg/src/pdbize.hh index 1ddf66fc5d047d882563b5e0fe7e61dc989f8c86..79ce6744d9baaed6b2ec4dd9d9ad5ee6215929bb 100644 --- a/modules/mol/alg/src/pdbize.hh +++ b/modules/mol/alg/src/pdbize.hh @@ -35,7 +35,19 @@ extern const char* WATER_CHAIN_NAME; class DLLEXPORT_OST_MOL_ALG PDBize { public: explicit PDBize(int min_polymer_size=10): - min_polymer_size_(min_polymer_size), ent_(mol::CreateEntity()), + peptide_min_size_(min_polymer_size), + nucleicacid_min_size_(min_polymer_size), + saccharide_min_size_(min_polymer_size), ent_(mol::CreateEntity()), + curr_chain_name_(POLYPEPTIDE_CHAIN_NAMES), needs_adjustment_(false), + last_rnum_(0) + {} + + explicit PDBize(int peptide_min_size, + int nucleicacid_min_size, + int saccharide_min_size): + peptide_min_size_(peptide_min_size), + nucleicacid_min_size_(nucleicacid_min_size), + saccharide_min_size_(saccharide_min_size), ent_(mol::CreateEntity()), curr_chain_name_(POLYPEPTIDE_CHAIN_NAMES), needs_adjustment_(false), last_rnum_(0) {} @@ -45,7 +57,9 @@ public: EntityHandle Finish(bool shift_to_fit=true); private: - int min_polymer_size_; + int peptide_min_size_; + int nucleicacid_min_size_; + int saccharide_min_size_; EntityHandle ent_; ChainHandle ligand_chain_; ChainHandle water_chain_; diff --git a/modules/mol/alg/tests/test_pdbize.py b/modules/mol/alg/tests/test_pdbize.py index 5a180b95267938eab7b1b2240ef3a5848fb02fbe..c9f67643a04877d3132de8d5f7a054830a389951 100644 --- a/modules/mol/alg/tests/test_pdbize.py +++ b/modules/mol/alg/tests/test_pdbize.py @@ -4,7 +4,6 @@ import os import random class TestPDBize(unittest.TestCase): - def test_numbers_water_molecules_with_ins_codes(self): m = mol.CreateEntity() e = m.EditXCS(mol.BUFFERED_EDIT) @@ -94,6 +93,187 @@ class TestPDBize(unittest.TestCase): self.assertEqual(residues[26].number.num, 2) self.assertEqual(residues[26].number.ins_code, '\0') + def _CheckMinSize(self, ost_ent, seq_list, chn_nm_lst, **kwargs): + """Check effects of the *_min_size parameter. + + :param ost_ent: OST entity to be PDBized. + :type ost_ent: :class:`~ost.mol.EntityHandle` + :param seq_list: Sequence list for the chains in ost_ent. + :type seq_list: :class:`~ost.seq.SequenceList` + :param chn_nm_lst: List of expected chain names in PDBized entity. + :type chn_nm_lst: :class:`list` of :class:`str` + """ + if "saccharide_min_size" not in kwargs: + kwargs["saccharide_min_size"] = 10 + if "nucleicacid_min_size" not in kwargs: + kwargs["nucleicacid_min_size"] = 10 + if "peptide_min_size" not in kwargs: + kwargs["peptide_min_size"] = 10 + transformations = geom.Mat4List() + transformations.append(geom.Mat4()) + pdbizer = mol.alg.PDBize(**kwargs) + pdbizer.Add(ost_ent.Select(''), transformations, seq_list) + pdbized = pdbizer.Finish() + self.assertEqual(len(pdbized.chains), len(chn_nm_lst)) + for i in range(0, len(chn_nm_lst)): + self.assertEqual(pdbized.chains[i].name, chn_nm_lst[i]) + return pdbized + + def test_peptide_min_size(self): + """Make sure the peptide_min_size parameter works, place a polypeptide in + chain '_'. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(10): + e.AppendResidue(c, "ALA") + seqs = seq.CreateSequenceList() + seqs.AddSequence(seq.CreateSequence("LotsOfAlanin", "AAAAAAAAAA")) + + # test that small peptide chains end up in the ligand chain "_" + self._CheckMinSize(m, seqs, ["_"], peptide_min_size=11) + + # test again with two small peptide chains + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(15): + e.AppendResidue(c, "ALA") + seqs.AddSequence(seq.CreateSequence("MoreAlanin", "AAAAAAAAAAAAAAA")) + self._CheckMinSize(m, seqs, ["_"], peptide_min_size=16) + + # test one peptide in ligand chain, second as polymer chain + self._CheckMinSize(m, seqs, ["_", "A"], peptide_min_size=11) + + # actually disabling min. polymer size + self._CheckMinSize(m, seqs, ["A", "B"], peptide_min_size=0) + + def test_nucleicacid_min_size(self): + """Make sure the nucleicacid_min_size parameter works, place a + polynucleotide in chain '_'. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(10): + e.AppendResidue(c, "DA") + seqs = seq.CreateSequenceList() + seqs.AddSequence(seq.CreateSequence("LotsOfAdenine", "AAAAAAAAAA")) + + # test that small nucleotide chains end up in the ligand chain "_" + self._CheckMinSize(m, seqs, ["_"], nucleicacid_min_size=11) + + # test again with two small nucleic acid chains + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(15): + e.AppendResidue(c, "DA") + seqs.AddSequence(seq.CreateSequence("MoreAdenine", "AAAAAAAAAAAAAAA")) + self._CheckMinSize(m, seqs, ["_"], nucleicacid_min_size=16) + + # test one nucleic acid in ligand chain, second as polymer chain + self._CheckMinSize(m, seqs, ["_", "A"], nucleicacid_min_size=11) + + # actually disabling min. polymer size + self._CheckMinSize(m, seqs, ["A", "B"], nucleicacid_min_size=0) + + def test_saccharide_min_size(self): + """Make sure the saccharide_min_size parameter works, place an + oligosaccharide in chain '_'. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE) + for i in range(10): + e.AppendResidue(c, "NAG") + seqs = seq.CreateSequenceList() + + # test that small oligosaccharides end up in the ligand chain "_" + self._CheckMinSize(m, seqs, ["_"], saccharide_min_size=11) + + # test again with two small oligosaccharide chains + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE) + for i in range(15): + e.AppendResidue(c, "NAG") + self._CheckMinSize(m, seqs, ["_"], saccharide_min_size=16) + + # test one oligosaccharide in ligand chain, second as polymer chain + self._CheckMinSize(m, seqs, ["_", "A"], saccharide_min_size=11) + + # actually disabling min. polymer size + + def test_peptide_nucleicacid_saccharide_min_sizes(self): + """Make sure that all thre thresholds play well together. + """ + m = mol.CreateEntity() + e = m.EditXCS(mol.BUFFERED_EDIT) + c = e.InsertChain("A"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(10): + e.AppendResidue(c, "ALA") + seqs = seq.CreateSequenceList() + seqs.AddSequence(seq.CreateSequence("LotsOfAlanin", "AAAAAAAAAA")) + c = e.InsertChain("B"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(10): + e.AppendResidue(c, "DA") + seqs.AddSequence(seq.CreateSequence("LotsOfAdenine", "AAAAAAAAAA")) + c = e.InsertChain("C"); + e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE) + for i in range(10): + e.AppendResidue(c, "NAG") + + # Check branched entities can be abandoned in the ligand chain while + # peptides and nucleic acids live in their own chains. + pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_"], + saccharide_min_size=11, + peptide_min_size=0, + nucleicacid_min_size=0) + self.assertTrue(pdbized.chains[0].IsPolypeptide()) + self.assertTrue(pdbized.chains[1].IsPolynucleotide()) + self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"), + "oligosaccharide") + + # test to store a short polynucleotide and sugar in the ligand chain but keep + # longer polynucleotide and the peptide outside of the ligand chain. + c = e.InsertChain("D"); + e.SetChainType(c, mol.CHAINTYPE_POLY_DN) + for i in range(5): + e.AppendResidue(c, "DG") + seqs.AddSequence(seq.CreateSequence("LotsOfGuanine", "GGGGG")) + pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_"], + saccharide_min_size=11, + peptide_min_size=0, + nucleicacid_min_size=6) + self.assertTrue(pdbized.chains[0].IsPolypeptide()) + self.assertTrue(pdbized.chains[1].IsPolynucleotide()) + self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"), + "oligosaccharide") + self.assertEqual(pdbized.chains[2].residues[-1].GetStringProp("type"), + "polydeoxyribonucleotide") + + # test to add a small peptide to the ligand chain + c = e.InsertChain("E"); + e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L) + for i in range(5): + e.AppendResidue(c, "ALA") + seqs.AddSequence(seq.CreateSequence("SomeAlanin", "AAAAA")) + pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_", "C"], + saccharide_min_size=11, + peptide_min_size=6, + nucleicacid_min_size=3) + self.assertTrue(pdbized.chains[0].IsPolypeptide()) + self.assertTrue(pdbized.chains[1].IsPolynucleotide()) + self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"), + "oligosaccharide") + self.assertEqual(pdbized.chains[2].residues[-1].GetStringProp("type"), + "polypeptide(L)") + + if __name__ == "__main__": from ost import testutils testutils.RunTests()