diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 8a17a05675c0bd87c99a74a9ed618d01ab8db1bb..23d6708541cecfd168a0e500e2d2f923d7cf27fb 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -134,18 +134,6 @@ Local Distance Test scores (lDDT, DRMSD) :rtype: :class:`str` - - -:mod:`qsscoring <ost.mol.alg.qsscoring>` -- Quaternary Structure (QS) scores --------------------------------------------------------------------------------- - -.. automodule:: ost.mol.alg.qsscoring - :members: - :synopsis: Scoring of quaternary structures - -.. currentmodule:: ost.mol.alg - - :mod:`scoring <ost.mol.alg.scoring>` -- Specialized scoring functions -------------------------------------------------------------------------------- @@ -166,6 +154,23 @@ Local Distance Test scores (lDDT, DRMSD) .. currentmodule:: ost.mol.alg +:mod:`qsscoring <ost.mol.alg.qsscore>` -- QS score implementation +-------------------------------------------------------------------------------- + +.. warning:: + + The code that comes with + `Bertoni et al. <https://www.nature.com/articles/s41598-017-09654-8>`_ is + considered deprecated. QS score has been re-implemented to be tightly + integrated with the chain mapping algorithms. The old code is still available + and documented :doc:`here <qsscoring_deprecated>`. + +.. automodule:: ost.mol.alg.qsscore + :members: + :member-order: bysource + :synopsis: QS Score implementation + +.. currentmodule:: ost.mol.alg .. _steric-clashes: diff --git a/modules/mol/alg/doc/qsscoring_deprecated.rst b/modules/mol/alg/doc/qsscoring_deprecated.rst new file mode 100644 index 0000000000000000000000000000000000000000..425c62b248ad905a99f76021b9f4708d5eb8e09a --- /dev/null +++ b/modules/mol/alg/doc/qsscoring_deprecated.rst @@ -0,0 +1,13 @@ +:orphan: + +Quaternary Structure (QS) scores (deprecated) +================================================================================ + +:mod:`qsscoring <ost.mol.alg.qsscoring>` -- Quaternary Structure (QS) scores +-------------------------------------------------------------------------------- + +.. automodule:: ost.mol.alg.qsscoring + :members: + :synopsis: Scoring of quaternary structures + +.. currentmodule:: ost.mol.alg diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index c54ce0583a0e3e0d9307aa07349982b827d0820c..cd3a79f1017022bce9a86fa15cd901d9a51b71e1 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -23,6 +23,7 @@ set(OST_MOL_ALG_PYMOD_MODULES helix_kinks.py hbond.py lddt.py + qsscore.py scoring.py chain_mapping.py ) diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index 7739af52a0f5c7868f78fb21289eaaf95cdb791b..1d64e874804a65605843693f3adf1055b93ed388 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -30,6 +30,7 @@ class MappingResult: self._model = model self._chem_groups = chem_groups self._mapping = mapping + self._alns = alns @property def target(self): @@ -60,7 +61,6 @@ class MappingResult: """ return self._chem_groups - @property def mapping(self): """ Mapping of :attr:`model` chains onto :attr:`~target` @@ -86,7 +86,7 @@ class MappingResult: :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value: :class:`ost.seq.AlignmentHandle` """ - return self._aln + return self._alns class ReprResult: @@ -1081,11 +1081,18 @@ def _ProcessStructure(ent, min_pep_length, min_nuc_length): returned view, sequences have :class:`ost.mol.EntityView` of according chains attached 3) same for polynucleotide chains """ - query = "peptide=true or nucleotide=true and aname=\"CA\",\"C3'\"" - sel = ent.Select(query) view = ent.CreateEmptyView() + query = "(peptide=true or nucleotide=true) and aname=\"CA\",\"CB\",\"C3'\"" + sel = ent.Select(query) for r in sel.residues: - view.AddResidue(r.handle, mol.INCLUDE_ALL) + if r.chem_class.IsNucleotideLinking(): + view.AddResidue(r.handle, mol.INCLUDE_ALL) + elif r.chem_class.IsPeptideLinking(): + if len(r.atoms) == 2: + view.AddResidue(r.handle, mol.INCLUDE_ALL) + elif r.name == "GLY": + if len(r.atoms) == 1 and r.atoms[0].name == "CA": + view.AddResidue(r.handle, mol.INCLUDE_ALL) polypep_seqs = seq.CreateSequenceList() polynuc_seqs = seq.CreateSequenceList() @@ -1501,9 +1508,9 @@ def _CheckOneToOneMapping(ref_chains, mdl_chains): one_to_one = list() for ref, mdl in zip(ref_chains, mdl_chains): if len(ref) == 1 and len(mdl) == 1: - one_to_one.append(mdl[0]) + one_to_one.append(mdl) elif len(ref) == 1 and len(mdl) == 0: - one_to_one.append(None) + one_to_one.append([None]) else: only_one_to_one = False break diff --git a/modules/mol/alg/pymod/qsscore.py b/modules/mol/alg/pymod/qsscore.py new file mode 100644 index 0000000000000000000000000000000000000000..54c4a3d5a9b47d0fe00982fb5aeb60689953c253 --- /dev/null +++ b/modules/mol/alg/pymod/qsscore.py @@ -0,0 +1,473 @@ +import itertools +import numpy as np +from scipy.spatial import distance + +import time +from ost import mol + +class QSEntity: + """ Helper object for QS-score computation + + Holds structural information and getters for interacting chains, i.e. + interfaces. Peptide residues are represented by their CB position + (CA for GLY) and nucleotides by C3'. + + :param ent: Structure for QS score computation + :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle` + :param contact_d: Pairwise distance of residues to be considered as contacts + :type contact_d: :class:`float` + """ + def __init__(self, ent, contact_d = 12.0): + pep_query = "(peptide=true and (aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\")))" + nuc_query = "(nucleotide=True and aname=\"C3'\")" + self._view = ent.Select(" or ".join([pep_query, nuc_query])) + self._contact_d = contact_d + + # the following attributes will be lazily evaluated + self._chain_names = None + self._interacting_chains = None + self._sequence = dict() + self._pos = dict() + self._pair_dist = dict() + + @property + def view(self): + """ Processed structure + + View that only contains representative atoms. That's CB for peptide + residues (CA for GLY) and C3' for nucleotides. + + :type: :class:`ost.mol.EntityView` + """ + return self._view + + @property + def contact_d(self): + """ Pairwise distance of residues to be considered as contacts + + Given at :class:`QSEntity` construction + + :type: :class:`float` + """ + return self._contact_d + + @property + def chain_names(self): + """ Chain names in :attr:`~view` + + Names are sorted + + :type: :class:`list` of :class:`str` + """ + if self._chain_names is None: + self._chain_names = sorted([ch.name for ch in self.view.chains]) + return self._chain_names + + @property + def interacting_chains(self): + """ Pairs of chains in :attr:`~view` with at least one contact + + :type: :class:`list` of :class:`tuples` + """ + if self._interacting_chains is None: + self._interacting_chains = list() + for x in itertools.combinations(self.chain_names, 2): + if np.count_nonzero(self.PairDist(x[0], x[1]) < self.contact_d): + self._interacting_chains.append(x) + return self._interacting_chains + + def GetChain(self, chain_name): + """ Get chain by name + + :param chain_name: Chain in :attr:`~view` + :type chain_name: :class:`str` + """ + chain = self.view.FindChain(chain_name) + if not chain.IsValid(): + raise RuntimeError(f"view has no chain named \"{chain_name}\"") + return chain + + def GetSequence(self, chain_name): + """ Get sequence of chain + + Returns sequence of specified chain as raw :class:`str` + + :param chain_name: Chain in :attr:`~view` + :type chain_name: :class:`str` + """ + if chain_name not in self._sequence: + ch = self.GetChain(chain_name) + s = ''.join([r.one_letter_code for r in ch.residues]) + self._sequence[chain_name] = s + return self._sequence[chain_name] + + def GetPos(self, chain_name): + """ Get representative positions of chain + + That's CB positions for peptide residues (CA for GLY) and C3' for + nucleotides. Returns positions as a numpy array of shape + (n_residues, 3). + + :param chain_name: Chain in :attr:`~view` + :type chain_name: :class:`str` + """ + if chain_name not in self._pos: + ch = self.GetChain(chain_name) + pos = np.zeros((len(ch.residues), 3)) + for i, r in enumerate(ch.residues): + pos[i,:] = r.atoms[0].GetPos().data + self._pos[chain_name] = pos + return self._pos[chain_name] + + def PairDist(self, chain_name_one, chain_name_two): + """ Get pairwise distances between two chains + + Returns distances as numpy array of shape (a, b). + Where a is the number of residues of the chain that comes BEFORE the + other in :attr:`~chain_names` + """ + key = (min(chain_name_one, chain_name_two), + max(chain_name_one, chain_name_two)) + if key not in self._pair_dist: + self._pair_dist[key] = distance.cdist(self.GetPos(key[0]), + self.GetPos(key[1]), + 'euclidean') + return self._pair_dist[key] + +class QSScorer: + """ Helper object to compute QS-score + + It's conceptually tightly integrated into the mechanisms from the + chain_mapping module. The prefered way to derive an object of type + :class:`QSScorer` is through the static constructor: + :func:`~FromMappingResult`. Example score computation including mapping: + + :: + + from ost.mol.alg.qsscore import QSScorer + from ost.mol.alg.chain_mapping import ChainMapper + + ent_1 = io.LoadPDB("path_to_assembly_1.pdb") + ent_2 = io.LoadPDB("path_to_assembly_2.pdb") + + chain_mapper = ChainMapper(ent_1) + mapping_result = chain_mapper.GetlDDTMapping(ent_2) + qs_scorer = QSScorer.FromMappingResult(mapping_result) + print("score:", qs_scorer.GetQSScore(mapping_result.mapping)) + + + Formulas for QS scores: + + :: + + - QS_global = weighted_scores / (weight_sum + weight_extra_all) + -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared + -> weight_sum = sum(w(min(d1,d2))) for shared + -> weight_extra_all = sum(w(d)) for all non-shared + -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else + + In the formulas above: + + * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for + "shared" contacts). + * "mapped": we could map chains of two structures and align residues in + :attr:`alignments`. + * "shared": pairs of residues which are "mapped" and have + "inter-chain contact" in both structures. + * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A + (fallback to CA-CA if :attr:`calpha_only` is True). + * "w(d)": weighting function (prob. of 2 res. to interact given CB distance) + from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_. + + The actual QS-score computation in :func:`QSScorer.GetQSScore` implements + caching. Repeated computations with alternative chain mappings thus become + faster. + + :param target: Structure designated as "target". Can be fetched from + :class:`ost.mol.alg.chain_mapping.MappingResult` + :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle` + :param chem_groups: Groups of chemically equivalent chains in *target*. + Can be fetched from + :class:`ost.mol.alg.chain_mapping.MappingResult` + :type chem_groups: :class:`list` of :class:`list` of :class:`str` + :param model: Structure designated as "model". Can be fetched from + :class:`ost.mol.alg.chain_mapping.MappingResult` + :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle` + :param alns: Each alignment is accessible with ``alns[(t_chain,m_chain)]``. + First sequence is the sequence of the respective chain in + :attr:`~qsent1`, second sequence the one from :attr:`~qsent2`. + Can be fetched from + :class:`ost.mol.alg.chain_mapping.MappingResult` + :type alns: :class:`dict` with key: :class:`tuple` of :class:`str`, value: + :class:`ost.seq.AlignmentHandle` + """ + def __init__(self, target, chem_groups, model, alns, contact_d = 12.0): + + self._qsent1 = QSEntity(target, contact_d = contact_d) + + # ensure that target chain names match the ones in chem_groups + chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups)) + if self._qsent1.chain_names != sorted(chem_group_ch_names): + raise RuntimeError(f"Expect exact same chain names in chem_groups " + f"and in target (which is processed to only " + f"contain peptides/nucleotides). target: " + f"{self._qsent1.chain_names}, chem_groups: " + f"{chem_group_ch_names}") + + self._chem_groups = chem_groups + self._qsent2 = QSEntity(model, contact_d = contact_d) + self._alns = alns + + # cache for mapped interface scores + # key: tuple of tuple ((qsent1_ch1, qsent1_ch2), + # ((qsent2_ch1, qsent2_ch2)) + # value: tuple with two numbers referring to QS-score formalism + # (equation 6 in Bertoni et al., 2017): + # 1: contribution of that interface to nominator + # 2: contribution of that interface to denominator + self._mapped_cache = dict() + + # cache for non-mapped interfaces in qsent1 + # key: tuple (qsent1_ch1, qsent1_ch2) + # value: contribution of that interface to second term in denominator in + # equation 6 in Bertoni et al., 2017. + # (sum of non-shared penalty of qsent1) + self._qsent_1_penalties = dict() + + # same for qsent2 + self._qsent_2_penalties = dict() + + @staticmethod + def FromMappingResult(mapping_result): + """ The preferred way to get a :class:`QSScorer` + + Static constructor that derives an object of type :class:`QSScorer` + using a :class:`ost.mol.alg.chain_mapping.MappingResult` + + :param mapping_result: Data source + :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult` + """ + qs_scorer = QSScorer(mapping_result.target, mapping_result.chem_groups, + mapping_result.model, alns = mapping_result.alns) + return qs_scorer + + @property + def qsent1(self): + """ Represents *target* + + :type: :class:`QSEntity` + """ + return self._qsent1 + + @property + def chem_groups(self): + """ Groups of chemically equivalent chains in *target* + + Provided at object construction + + :type: :class:`list` of :class:`list` of :class:`str` + """ + return self._chem_groups + + @property + def qsent2(self): + """ Represents *model* + + :type: :class:`QSEntity` + """ + return self._qsent2 + + @property + def alns(self): + """ Alignments between chains in :attr:`~qsent1` and :attr:`~qsent2` + + Provided at object construction. Each alignment is accessible with + ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the + respective chain in :attr:`~qsent1`, second sequence the one from + :attr:`~qsent2`. + + :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value: + :class:`ost.seq.AlignmentHandle` + """ + return self._alns + + def GetQSScore(self, mapping, check=True): + """ Computes QS-score given chain mapping + + Again, the preferred way is to get this chain mapping is from an object + of type :class:`ost.mol.alg.chain_mapping.MappingResult`. + + :param mapping: see + :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping` + :type mapping: :class:`list` of :class:`list` of :class:`str` + :param check: Perform input checks, can be disabled for speed purposes + if you know what you're doing. + :type check: :class:`bool` + :returns: The QS-Score + """ + + if check: + # ensure that dimensionality of mapping matches self.chem_groups + if len(self.chem_groups) != len(mapping): + raise RuntimeError("Dimensions of self.chem_groups and mapping " + "must match") + for a,b in zip(self.chem_groups, mapping): + if len(a) != len(b): + raise RuntimeError("Dimensions of self.chem_groups and " + "mapping must match") + # ensure that chain names in mapping are all present in qsent2 + for name in itertools.chain.from_iterable(mapping): + if name is not None and name not in self.qsent2.chain_names: + raise RuntimeError(f"Each chain in mapping must be present " + f"in self.qsent2. No match for " + f"\"{name}\"") + + flat_mapping = dict() + for a, b in zip(self.chem_groups, mapping): + flat_mapping.update({x: y for x, y in zip(a, b) if y is not None}) + + # refers to equation 6 in Bertoni et al., 2017 + nominator = 0.0 + denominator= 0.0 + + # keep track of processed interfaces in qsent2 + processed_qsent2_interfaces = set() + + for int1 in self.qsent1.interacting_chains: + if int1[0] in flat_mapping and int1[1] in flat_mapping: + int2 = (flat_mapping[int1[0]], flat_mapping[int1[1]]) + a,b = self._MappedInterfaceScores(int1, int2) + nominator += a + denominator += b + processed_qsent2_interfaces.add((min(int2[0], int2[1]), + max(int2[0], int2[1]))) + else: + denominator += self._InterfacePenalty1(int1) + + # add penalties for non-mapped interfaces in qsent2 + for int2 in self.qsent2.interacting_chains: + if int2 not in processed_qsent2_interfaces: + denominator += self._InterfacePenalty2(int2) + + if denominator > 0.0: + return nominator / denominator + else: + return 0.0 + + def _MappedInterfaceScores(self, int1, int2): + if (int1, int2) not in self._mapped_cache: + self._mapped_cache[(int1, int2)] = self._InterfaceScores(int1, int2) + return self._mapped_cache[(int1, int2)] + + def _InterfaceScores(self, int1, int2): + + d1 = self.qsent1.PairDist(int1[0], int1[1]) + d2 = self.qsent2.PairDist(int2[0], int2[1]) + + # given two chain names a and b: if a < b, shape of pairwise distances is + # (len(a), len(b)). However, if b > a, its (len(b), len(a)) => transpose + if int2[0] > int2[1]: + d2 = d2.transpose() + + # should be given by design => no need to transpose d1 + assert(int1[0] < int1[1]) + + # indices of the first chain in the two interfaces + mapped_indices_1_1, mapped_indices_1_2 = \ + self._IndexMapping(int1[0], int2[0]) + # indices of the second chain in the two interfaces + mapped_indices_2_1, mapped_indices_2_2 = \ + self._IndexMapping(int1[1], int2[1]) + + # get shared_masks - for this we first need to select the actual + # mapped positions to get a one-to-one relationshop and map it back + # to the original mask size + mapped_idx_grid_1 = np.ix_(mapped_indices_1_1, mapped_indices_2_1) + mapped_idx_grid_2 = np.ix_(mapped_indices_1_2, mapped_indices_2_2) + mapped_d1 = d1[mapped_idx_grid_1] + mapped_d2 = d2[mapped_idx_grid_2] + assert(mapped_d1.shape == mapped_d2.shape) + assert(self.qsent1.contact_d == self.qsent2.contact_d) + contact_d = self.qsent1.contact_d + shared_mask = np.logical_and(mapped_d1 < contact_d, + mapped_d2 < contact_d) + shared_mask_d1 = np.full(d1.shape, False, dtype=bool) + shared_mask_d1[mapped_idx_grid_1] = shared_mask + shared_mask_d2 = np.full(d2.shape, False, dtype=bool) + shared_mask_d2[mapped_idx_grid_2] = shared_mask + + # nominator and denominator contributions from shared contacts + shared_d1 = d1[shared_mask_d1] + shared_d2 = d2[shared_mask_d2] + shared_min = np.minimum(shared_d1, shared_d2) + shared_abs_diff_div_12 = np.abs(np.subtract(shared_d1, shared_d2))/12.0 + weight_term = np.ones(shared_min.shape[0]) + bigger_5_mask = shared_min > 5.0 + weights = np.exp(-2.0*np.square((shared_min[bigger_5_mask]-5.0)/4.28)) + weight_term[bigger_5_mask] = weights + diff_term = np.subtract(np.ones(weight_term.shape[0]), + shared_abs_diff_div_12) + nominator = np.sum(np.multiply(weight_term, diff_term)) + denominator = np.sum(weight_term) + + # denominator contribution from non shared contacts in interface one + contact_distances = d1[np.logical_and(np.logical_not(shared_mask_d1), + d1 < contact_d)] + bigger_5 = contact_distances[contact_distances > 5] + denominator += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28))) + # add 1.0 for all contact distances <= 5.0 + denominator += contact_distances.shape[0] - bigger_5.shape[0] + + # same for interface two + contact_distances = d2[np.logical_and(np.logical_not(shared_mask_d2), + d2 < contact_d)] + bigger_5 = contact_distances[contact_distances > 5] + denominator += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28))) + # add 1.0 for all contact distances <= 5.0 + denominator += contact_distances.shape[0] - bigger_5.shape[0] + + return (nominator, denominator) + + def _IndexMapping(self, ch1, ch2): + """ Fetches aln and returns indices of (non-)aligned residues + + returns 2 numpy arrays containing the indices of residues in + ch1 and ch2 which are aligned + """ + mapped_indices_1 = list() + mapped_indices_2 = list() + idx_1 = 0 + idx_2 = 0 + for col in self.alns[(ch1, ch2)]: + if col[0] != '-' and col[1] != '-': + mapped_indices_1.append(idx_1) + mapped_indices_2.append(idx_2) + if col[0] != '-': + idx_1 +=1 + if col[1] != '-': + idx_2 +=1 + return (np.array(mapped_indices_1), np.array(mapped_indices_2)) + + def _InterfacePenalty1(self, interface): + if interface not in self._qsent_1_penalties: + self._qsent_1_penalties[interface] = \ + self._InterfacePenalty(self.qsent1, interface) + return self._qsent_1_penalties[interface] + + def _InterfacePenalty2(self, interface): + if interface not in self._qsent_2_penalties: + self._qsent_2_penalties[interface] = \ + self._InterfacePenalty(self.qsent2, interface) + return self._qsent_2_penalties[interface] + + def _InterfacePenalty(self, qsent, interface): + d = qsent.PairDist(interface[0], interface[1]) + contact_distances = d[d < qsent.contact_d] + bigger_5 = contact_distances[contact_distances > 5] + penalty = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28))) + # add 1.0 for all contact distances <= 5.0 + penalty += contact_distances.shape[0] - bigger_5.shape[0] + return penalty + +# specify public interface +__all__ = ('QSEntity', 'QSScorer') diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt index 3672735bed274fd219c73eb42c8665c7a570f3b7..a567a16573f07da7d2b5331cbdea531e36783084 100644 --- a/modules/mol/alg/tests/CMakeLists.txt +++ b/modules/mol/alg/tests/CMakeLists.txt @@ -9,6 +9,7 @@ set(OST_MOL_ALG_UNIT_TESTS test_accessibility.py test_sec_struct.py test_lddt.py + test_qsscore.py ) if (COMPOUND_LIB) diff --git a/modules/mol/alg/tests/test_qsscore.py b/modules/mol/alg/tests/test_qsscore.py new file mode 100644 index 0000000000000000000000000000000000000000..61d27b5c42abd1f1e61231749638249f0c07e5a2 --- /dev/null +++ b/modules/mol/alg/tests/test_qsscore.py @@ -0,0 +1,175 @@ +import unittest, os, sys +import ost +from ost import conop +from ost import io, mol, seq, settings +# check if we can import: fails if numpy or scipy not available +try: + import numpy as np + from ost.mol.alg.qsscore import * + from ost.mol.alg.chain_mapping import * +except ImportError: + print("Failed to import qsscore.py. Happens when numpy or scipy "\ + "missing. Ignoring qsscore.py tests.") + sys.exit(0) + +def _LoadFile(file_name): + """Helper to avoid repeating input path over and over.""" + return io.LoadPDB(os.path.join('testfiles', file_name)) + +class TestQSScore(unittest.TestCase): + + def test_qsentity(self): + ent = _LoadFile("3l1p.1.pdb") + qsent = QSEntity(ent) + self.assertEqual(len(qsent.view.chains), 4) + self.assertEqual(qsent.GetChain("A").GetName(), "A") + self.assertEqual(qsent.GetChain("B").GetName(), "B") + self.assertEqual(qsent.GetChain("C").GetName(), "C") + self.assertEqual(qsent.GetChain("D").GetName(), "D") + self.assertRaises(Exception, qsent.GetChain, "E") + self.assertEqual(qsent.chain_names, ["A", "B", "C", "D"]) + self.assertEqual(qsent.GetSequence("A"), "DMKALQKELEQFAKLLKQKRITLGYTQADVGLTLGVLFGKVFSQTTISRFEALQLSLKNMSKLRPLLEKWVEEADNNENLQEISKSVQARKRKRTSIENRVRWSLETMFLKSPKPSLQQITHIANQLGLEKDVVRVWFSNRRQKGKR") + self.assertEqual(qsent.GetSequence("B"), "KALQKELEQFAKLLKQKRITLGYTQADVGLTLGVLFGKVFSQTTISRFEALQLSLKNMSKLRPLLEKWVEEADNNENLQEISKSQARKRKRTSIENRVRWSLETMFLKSPKPSLQQITHIANQLGLEKDVVRVWFSNRRQKGKRS") + self.assertEqual(qsent.GetSequence("C"), "TCCACATTTGAAAGGCAAATGGA") + self.assertEqual(qsent.GetSequence("D"), "ATCCATTTGCCTTTCAAATGTGG") + + # check for a couple of positions with manually extracted values + + # GLU + pos = qsent.GetPos("B") + self.assertAlmostEqual(pos[5,0], -1.661, places=3) + self.assertAlmostEqual(pos[5,1], 27.553, places=3) + self.assertAlmostEqual(pos[5,2], 12.774, places=3) + + # GLY + pos = qsent.GetPos("A") + self.assertAlmostEqual(pos[23,0], 17.563, places=3) + self.assertAlmostEqual(pos[23,1], -4.082, places=3) + self.assertAlmostEqual(pos[23,2], 29.005, places=3) + + # Cytosine + pos = qsent.GetPos("C") + self.assertAlmostEqual(pos[4,0], 14.796, places=3) + self.assertAlmostEqual(pos[4,1], 24.653, places=3) + self.assertAlmostEqual(pos[4,2], 59.318, places=3) + + + # check pairwise dist, chain names are always sorted => + # A is rows, C is cols + dist_one = qsent.PairDist("A", "C") + dist_two = qsent.PairDist("C", "A") + self.assertTrue(np.array_equal(dist_one, dist_two)) + self.assertEqual(dist_one.shape[0], len(qsent.GetSequence("A"))) + self.assertEqual(dist_one.shape[1], len(qsent.GetSequence("C"))) + + # check some random distance between the Gly and Cytosine that we already + # checked above + self.assertAlmostEqual(dist_one[23,4], 41.86, places=2) + + # all chains interact with each other... but hey, check nevertheless + self.assertEqual(qsent.interacting_chains, [("A", "B"), ("A", "C"), + ("A", "D"), ("B", "C"), + ("B", "D"), ("C", "D")]) + + def test_qsscorer(self): + + target = _LoadFile("3l1p.1.pdb") + model = _LoadFile("3l1p.1_model.pdb") + + # we need to derive a chain mapping prior to scoring + mapper = ChainMapper(target) + res = mapper.GetRigidMapping(model, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.472, places=2) + + def test_hetero_case_1(self): + # additional chains + ent_1 = _LoadFile('4ux8.1.pdb') # A2 B2 C2, symmetry: C2 + ent_2 = _LoadFile('3fub.2.pdb') # A2 B2 , symmetry: C2 + mapper = ChainMapper(ent_1) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.825, 2) + + def test_HeteroCase1b(self): + # as above but with assymetric unit of 3fub + # -> no overlap found: check if extensive search can deal with it + ent_1 = _LoadFile('4ux8.1.pdb') + ent_2 = _LoadFile('3fub.au.pdb') + mapper = ChainMapper(ent_1) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.356, 2) + + def test_hetero_case_2(self): + # different stoichiometry + ent_1 = _LoadFile('1efu.1.pdb') # A2 B2, symmetry: C2 + ent_2 = _LoadFile('4pc6.1.pdb') # A B , no symmetry + mapper = ChainMapper(ent_1) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.3131, 2) + + def test_hetero_case_3(self): + # more chains + ent_1 = _LoadFile('2vjt.1.pdb') # A6 B6, symmetry: D3 + ent_2 = _LoadFile('3dbj.1.pdb') # A3 B3, symmetry: C3 + mapper = ChainMapper(ent_1) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.359, 2) + + def test_hetero_case_4(self): + # inverted chains + ent_1 = _LoadFile('3ia3.1.pdb') # AB, no symmetry + ent_2 = _LoadFile('3ia3.2.pdb') # BA, no symmetry + mapper = ChainMapper(ent_1) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.980, 2) + + def test_hetero_model(self): + # uncomplete model missing 2 third of the contacts + target = _LoadFile('1eud_ref.pdb') # AB, no symmetry + model = _LoadFile('1eud_mdl_partial-dimer.pdb') # BA, no symmetry + mapper = ChainMapper(target) + res = mapper.GetRigidMapping(model, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.323, 2) + + def test_homo_1(self): + # different stoichiometry SOD + ent_1 = _LoadFile('4dvh.1.pdb') # A2, symmetry: C2 + ent_2 = _LoadFile('4br6.1.pdb') # A4, symmetry: D2 + # original qsscoring uses other default values for gap_open and gap_extension + # penalties, let's use those to reproduce the old results as the alignments + # would differ otherwise + mapper = ChainMapper(ent_1, pep_gap_open = -5, pep_gap_ext = -2) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 0.147, 2) + + def test_homo_2(self): + # broken cyclic symmetry + ent_1 = _LoadFile('4r7y.1.pdb') # A6, symmetry: C6 + ent_2 = ent_1.Select('cname=A,B') # A2, no symmetry + mapper = ChainMapper(ent_1) + res = mapper.GetRigidMapping(ent_2, strategy="greedy_iterative_rmsd") + qs_scorer = QSScorer.FromMappingResult(res) + qs_score = qs_scorer.GetQSScore(res.mapping) + self.assertAlmostEqual(qs_score, 1/6, 2) + +if __name__ == "__main__": + from ost import testutils + if testutils.SetDefaultCompoundLib(): + testutils.RunTests() + else: + print('No compound lib available. Ignoring test_qsscore.py tests.')