Skip to content
Snippets Groups Projects
Commit 00a39f32 authored by B13nch3n's avatar B13nch3n
Browse files

SCHWED-4896: extend control in PDBize

parent e4709d79
No related branches found
Tags 2.1.0-rc3
No related merge requests found
......@@ -798,7 +798,7 @@ of the annotation available.
See :attr:`operationsintervalls`
.. function:: PDBize(asu, seqres=None, min_polymer_size=10, transformation=False)
.. function:: PDBize(asu, seqres=None, min_polymer_size=None, transformation=False, peptide_min_size=10, nucleicacid_min_size=10, saccharide_min_size=10)
Returns the biological assembly (bio unit) for an entity. The new entity
created is well suited to be saved as a PDB file. Therefore the function
......@@ -810,7 +810,8 @@ of the annotation available.
- Each polymer gets its own chain, named A-Z 0-9 a-z.
- The description of non-polymer chains will be put into a generic string
property called description on the residue level.
- Ligands that resemble a polymer but have less than *min_polymer_size*
- Ligands that resemble a polymer but have less than *min_polymer_size* /
*peptide_min_size* / *nucleicacid_min_size* / *saccharide_min_size*
residues are assigned the same numeric residue number. The residues are
distinguished by insertion code.
- Sometimes bio units exceed the coordinate system storable in a PDB file.
......@@ -830,11 +831,21 @@ of the annotation available.
:type seqres: :class:`~ost.seq.SequenceList`
:param min_polymer_size: The minimal number of residues a polymer needs to
get its own chain. Everything below that number will be sorted into the
ligand chain.
ligand chain. Overrides *peptide_min_size*, *nucleicacid_min_size* and
*saccharide_min_size* if set to a value different than None.
:type min_polymer_size: int
:param transformation: If set, return the transformation matrix used to
move the bounding box of the bio unit to the lower left corner.
:type transformation: :class:`bool`
:param peptide_min_size: Minimal size to get an individual chain for a
polypeptide. Is overridden by *min_polymer_size*.
:type peptide_min_size: :class:`int`
:param nucleicacid_min_size: Minimal size to get an individual chain for a
polynucleotide. Is overridden by *min_polymer_size*.
:type nucleicacid_min_size: :class:`int`
:param saccharide_min_size: Minimal size to get an individual chain for an
oligosaccharide or polysaccharide. Is overridden by *min_polymer_size*.
:type saccharide_min_size: :class:`int`
.. class:: MMCifInfoStructDetails
......
......@@ -358,9 +358,15 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT'
# arguement is the usual 'self'.
# documentation for this function was moved to mmcif.rst,
# MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX.
def _PDBize(biounit, asu, seqres=None, min_polymer_size=10,
transformation=False):
pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size)
def _PDBize(biounit, asu, seqres=None, min_polymer_size=None,
transformation=False, peptide_min_size=10, nucleicacid_min_size=10,
saccharide_min_size=10):
if min_polymer_size is not None:
pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size)
else:
pdbizer = mol.alg.PDBize(peptide_min_size=peptide_min_size,
nucleicacid_min_size=nucleicacid_min_size,
saccharide_min_size=saccharide_min_size)
chains = biounit.GetChainList()
c_intvls = biounit.GetChainIntervalList()
......
......@@ -439,7 +439,10 @@ BOOST_PYTHON_MODULE(_ost_mol_alg)
class_<mol::alg::PDBize>("PDBize",
init<int>(arg("min_polymer_size")=10))
init<int,int,int>((arg("peptide_min_size"),
arg("nucleicacid_min_size"),
arg("saccharide_min_size"))))
.def(init<int>(arg("min_polymer_size")=10))
.def("Add", &mol::alg::PDBize::Add,
(arg("asu"), arg("transformations"), arg("seqres")))
.def("Finish", &mol::alg::PDBize::Finish, arg("shift_to_fit")=true)
......
......@@ -76,12 +76,19 @@ void PDBize::Add(EntityView asu, const geom::Mat4List& transforms,
e2 =asu.GetChainList().end(); j != e2; ++j) {
ChainView chain = *j;
int chain_length = chain.GetResidueCount();
if (chain_length < min_polymer_size_ && seqres.IsValid()) {
if (((chain.IsPolypeptide() && chain_length < peptide_min_size_) ||
(chain.IsPolynucleotide() && chain_length < nucleicacid_min_size_) ||
((chain.IsOligosaccharide() || chain.IsPolysaccharide()) &&
chain_length < saccharide_min_size_)) &&
seqres.IsValid()) {
seq::SequenceHandle s = seqres.FindSequence(chain.GetName());
if (s.IsValid())
chain_length = s.GetLength();
}
if (chain.IsPolymer() && chain_length >= min_polymer_size_) {
if ((chain.IsPolypeptide() && chain_length >= peptide_min_size_) ||
(chain.IsPolynucleotide() && chain_length >= nucleicacid_min_size_) ||
((chain.IsOligosaccharide() || chain.IsPolysaccharide()) &&
chain_length >= saccharide_min_size_)) {
if (*curr_chain_name_ == 0) {
throw std::runtime_error("running out of chain names");
}
......
......@@ -35,7 +35,19 @@ extern const char* WATER_CHAIN_NAME;
class DLLEXPORT_OST_MOL_ALG PDBize {
public:
explicit PDBize(int min_polymer_size=10):
min_polymer_size_(min_polymer_size), ent_(mol::CreateEntity()),
peptide_min_size_(min_polymer_size),
nucleicacid_min_size_(min_polymer_size),
saccharide_min_size_(min_polymer_size), ent_(mol::CreateEntity()),
curr_chain_name_(POLYPEPTIDE_CHAIN_NAMES), needs_adjustment_(false),
last_rnum_(0)
{}
explicit PDBize(int peptide_min_size,
int nucleicacid_min_size,
int saccharide_min_size):
peptide_min_size_(peptide_min_size),
nucleicacid_min_size_(nucleicacid_min_size),
saccharide_min_size_(saccharide_min_size), ent_(mol::CreateEntity()),
curr_chain_name_(POLYPEPTIDE_CHAIN_NAMES), needs_adjustment_(false),
last_rnum_(0)
{}
......@@ -45,7 +57,9 @@ public:
EntityHandle Finish(bool shift_to_fit=true);
private:
int min_polymer_size_;
int peptide_min_size_;
int nucleicacid_min_size_;
int saccharide_min_size_;
EntityHandle ent_;
ChainHandle ligand_chain_;
ChainHandle water_chain_;
......
......@@ -4,7 +4,6 @@ import os
import random
class TestPDBize(unittest.TestCase):
def test_numbers_water_molecules_with_ins_codes(self):
m = mol.CreateEntity()
e = m.EditXCS(mol.BUFFERED_EDIT)
......@@ -94,6 +93,187 @@ class TestPDBize(unittest.TestCase):
self.assertEqual(residues[26].number.num, 2)
self.assertEqual(residues[26].number.ins_code, '\0')
def _CheckMinSize(self, ost_ent, seq_list, chn_nm_lst, **kwargs):
"""Check effects of the *_min_size parameter.
:param ost_ent: OST entity to be PDBized.
:type ost_ent: :class:`~ost.mol.EntityHandle`
:param seq_list: Sequence list for the chains in ost_ent.
:type seq_list: :class:`~ost.seq.SequenceList`
:param chn_nm_lst: List of expected chain names in PDBized entity.
:type chn_nm_lst: :class:`list` of :class:`str`
"""
if "saccharide_min_size" not in kwargs:
kwargs["saccharide_min_size"] = 10
if "nucleicacid_min_size" not in kwargs:
kwargs["nucleicacid_min_size"] = 10
if "peptide_min_size" not in kwargs:
kwargs["peptide_min_size"] = 10
transformations = geom.Mat4List()
transformations.append(geom.Mat4())
pdbizer = mol.alg.PDBize(**kwargs)
pdbizer.Add(ost_ent.Select(''), transformations, seq_list)
pdbized = pdbizer.Finish()
self.assertEqual(len(pdbized.chains), len(chn_nm_lst))
for i in range(0, len(chn_nm_lst)):
self.assertEqual(pdbized.chains[i].name, chn_nm_lst[i])
return pdbized
def test_peptide_min_size(self):
"""Make sure the peptide_min_size parameter works, place a polypeptide in
chain '_'.
"""
m = mol.CreateEntity()
e = m.EditXCS(mol.BUFFERED_EDIT)
c = e.InsertChain("A");
e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L)
for i in range(10):
e.AppendResidue(c, "ALA")
seqs = seq.CreateSequenceList()
seqs.AddSequence(seq.CreateSequence("LotsOfAlanin", "AAAAAAAAAA"))
# test that small peptide chains end up in the ligand chain "_"
self._CheckMinSize(m, seqs, ["_"], peptide_min_size=11)
# test again with two small peptide chains
c = e.InsertChain("B");
e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L)
for i in range(15):
e.AppendResidue(c, "ALA")
seqs.AddSequence(seq.CreateSequence("MoreAlanin", "AAAAAAAAAAAAAAA"))
self._CheckMinSize(m, seqs, ["_"], peptide_min_size=16)
# test one peptide in ligand chain, second as polymer chain
self._CheckMinSize(m, seqs, ["_", "A"], peptide_min_size=11)
# actually disabling min. polymer size
self._CheckMinSize(m, seqs, ["A", "B"], peptide_min_size=0)
def test_nucleicacid_min_size(self):
"""Make sure the nucleicacid_min_size parameter works, place a
polynucleotide in chain '_'.
"""
m = mol.CreateEntity()
e = m.EditXCS(mol.BUFFERED_EDIT)
c = e.InsertChain("A");
e.SetChainType(c, mol.CHAINTYPE_POLY_DN)
for i in range(10):
e.AppendResidue(c, "DA")
seqs = seq.CreateSequenceList()
seqs.AddSequence(seq.CreateSequence("LotsOfAdenine", "AAAAAAAAAA"))
# test that small nucleotide chains end up in the ligand chain "_"
self._CheckMinSize(m, seqs, ["_"], nucleicacid_min_size=11)
# test again with two small nucleic acid chains
c = e.InsertChain("B");
e.SetChainType(c, mol.CHAINTYPE_POLY_DN)
for i in range(15):
e.AppendResidue(c, "DA")
seqs.AddSequence(seq.CreateSequence("MoreAdenine", "AAAAAAAAAAAAAAA"))
self._CheckMinSize(m, seqs, ["_"], nucleicacid_min_size=16)
# test one nucleic acid in ligand chain, second as polymer chain
self._CheckMinSize(m, seqs, ["_", "A"], nucleicacid_min_size=11)
# actually disabling min. polymer size
self._CheckMinSize(m, seqs, ["A", "B"], nucleicacid_min_size=0)
def test_saccharide_min_size(self):
"""Make sure the saccharide_min_size parameter works, place an
oligosaccharide in chain '_'.
"""
m = mol.CreateEntity()
e = m.EditXCS(mol.BUFFERED_EDIT)
c = e.InsertChain("A");
e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE)
for i in range(10):
e.AppendResidue(c, "NAG")
seqs = seq.CreateSequenceList()
# test that small oligosaccharides end up in the ligand chain "_"
self._CheckMinSize(m, seqs, ["_"], saccharide_min_size=11)
# test again with two small oligosaccharide chains
c = e.InsertChain("B");
e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE)
for i in range(15):
e.AppendResidue(c, "NAG")
self._CheckMinSize(m, seqs, ["_"], saccharide_min_size=16)
# test one oligosaccharide in ligand chain, second as polymer chain
self._CheckMinSize(m, seqs, ["_", "A"], saccharide_min_size=11)
# actually disabling min. polymer size
def test_peptide_nucleicacid_saccharide_min_sizes(self):
"""Make sure that all thre thresholds play well together.
"""
m = mol.CreateEntity()
e = m.EditXCS(mol.BUFFERED_EDIT)
c = e.InsertChain("A");
e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L)
for i in range(10):
e.AppendResidue(c, "ALA")
seqs = seq.CreateSequenceList()
seqs.AddSequence(seq.CreateSequence("LotsOfAlanin", "AAAAAAAAAA"))
c = e.InsertChain("B");
e.SetChainType(c, mol.CHAINTYPE_POLY_DN)
for i in range(10):
e.AppendResidue(c, "DA")
seqs.AddSequence(seq.CreateSequence("LotsOfAdenine", "AAAAAAAAAA"))
c = e.InsertChain("C");
e.SetChainType(c, mol.CHAINTYPE_OLIGOSACCHARIDE)
for i in range(10):
e.AppendResidue(c, "NAG")
# Check branched entities can be abandoned in the ligand chain while
# peptides and nucleic acids live in their own chains.
pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_"],
saccharide_min_size=11,
peptide_min_size=0,
nucleicacid_min_size=0)
self.assertTrue(pdbized.chains[0].IsPolypeptide())
self.assertTrue(pdbized.chains[1].IsPolynucleotide())
self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"),
"oligosaccharide")
# test to store a short polynucleotide and sugar in the ligand chain but keep
# longer polynucleotide and the peptide outside of the ligand chain.
c = e.InsertChain("D");
e.SetChainType(c, mol.CHAINTYPE_POLY_DN)
for i in range(5):
e.AppendResidue(c, "DG")
seqs.AddSequence(seq.CreateSequence("LotsOfGuanine", "GGGGG"))
pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_"],
saccharide_min_size=11,
peptide_min_size=0,
nucleicacid_min_size=6)
self.assertTrue(pdbized.chains[0].IsPolypeptide())
self.assertTrue(pdbized.chains[1].IsPolynucleotide())
self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"),
"oligosaccharide")
self.assertEqual(pdbized.chains[2].residues[-1].GetStringProp("type"),
"polydeoxyribonucleotide")
# test to add a small peptide to the ligand chain
c = e.InsertChain("E");
e.SetChainType(c, mol.CHAINTYPE_POLY_PEPTIDE_L)
for i in range(5):
e.AppendResidue(c, "ALA")
seqs.AddSequence(seq.CreateSequence("SomeAlanin", "AAAAA"))
pdbized = self._CheckMinSize(m, seqs, ["A", "B", "_", "C"],
saccharide_min_size=11,
peptide_min_size=6,
nucleicacid_min_size=3)
self.assertTrue(pdbized.chains[0].IsPolypeptide())
self.assertTrue(pdbized.chains[1].IsPolynucleotide())
self.assertEqual(pdbized.chains[2].residues[0].GetStringProp("type"),
"oligosaccharide")
self.assertEqual(pdbized.chains[2].residues[-1].GetStringProp("type"),
"polypeptide(L)")
if __name__ == "__main__":
from ost import testutils
testutils.RunTests()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment