From be69cfb5fb4ad0efed9a2c93be4c091a265513b2 Mon Sep 17 00:00:00 2001 From: Gerardo Tauriello <gerardo.tauriello@unibas.ch> Date: Fri, 25 May 2018 20:06:53 +0200 Subject: [PATCH] Deal with chain names with spaces or only space. --- modules/mol/alg/pymod/qsscoring.py | 60 +++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/modules/mol/alg/pymod/qsscoring.py b/modules/mol/alg/pymod/qsscoring.py index 919b5a72f..0f4edcc53 100644 --- a/modules/mol/alg/pymod/qsscoring.py +++ b/modules/mol/alg/pymod/qsscoring.py @@ -1240,6 +1240,28 @@ def _AlignAtomSeqs(seq_1, seq_2): LogWarning('%s: %s' % (seq_2.name, seq_2.string)) return aln +def _FixSelectChainName(ch_name): + """ + :return: String to be used with Select(cname=<RETURN>). Takes care of putting + quotation marks where needed. + :rtype: :class:`str` + :param ch_name: Single chain name (:class:`str`). + """ + if ch_name in ['-', '_', ' ']: + return '"%c"' % ch_name + else: + return ch_name + +def _FixSelectChainNames(ch_names): + """ + :return: String to be used with Select(cname=<RETURN>). Takes care of joining + and putting quotation marks where needed. + :rtype: :class:`str` + :param ch_names: Some iterable list of chain names (:class:`str` items). + """ + chain_set = set([_FixSelectChainName(ch_name) for ch_name in ch_names]) + return ','.join(chain_set) + # QS entity def _CleanInputEntity(ent): @@ -1263,13 +1285,7 @@ def _CleanInputEntity(ent): # remove them from *ent* if removed_chains: - chain_set = set() - for ch_name in removed_chains: - if ch_name in ['-', '_', ' ']: - chain_set.add('"%c"' % ch_name) - else: - chain_set.add(ch_name) - view = ent.Select('cname!=%s' % ','.join(chain_set)) + view = ent.Select('cname!=%s' % _FixSelectChainNames(removed_chains)) ent_new = mol.CreateEntityFromView(view, True) ent_new.SetName(ent.GetName()) else: @@ -1476,6 +1492,13 @@ def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, :param chem_mapping: See :attr:`QSscorer.chem_mapping` :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm` """ + # make sure name doesn't contain spaces and is unique + def _FixName(seq_name, seq_names): + # get rid of spaces and make it unique + seq_name = seq_name.replace(' ', '-') + while seq_name in seq_names: + seq_name += '-' + return seq_name # resulting views into CA entities using CA chain sequences ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView() ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView() @@ -1489,12 +1512,12 @@ def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, seq_to_empty_view = dict() for ch in group_1: sequence = ca_chains_1[ch].Copy() - sequence.name = qs_ent_1.GetName() + '.' + ch + sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view) seq_to_empty_view[sequence.name] = ent_view_1 seq_list.AddSequence(sequence) for ch in group_2: sequence = ca_chains_2[ch].Copy() - sequence.name = qs_ent_2.GetName() + '.' + ch + sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view) seq_to_empty_view[sequence.name] = ent_view_2 seq_list.AddSequence(sequence) alnc = ClustalW(seq_list, clustalw=clustalw_bin) @@ -1564,8 +1587,8 @@ def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping): for _, symm_1, symm_2 in sorted(best_symm): s1 = symm_1[0] s2 = symm_2[0] - group_1 = ent_to_cm_1.Select('cname=%s' % ','.join(s1)) - group_2 = ent_to_cm_2.Select('cname=%s' % ','.join(s2)) + group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1)) + group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2)) # check if by superposing a pair of chains within the symmetry group to # superpose all chains within the symmetry group # -> if successful, the symmetry groups are compatible @@ -1944,7 +1967,8 @@ def _GetClosestChainInterface(ent, ref_chain, chains): # inaccurate. Also it could be extracted from QSscoreEntity.contacts. closest = [] for ch in chains: - iface_view = ent.Select('cname=%s and 10 <> [cname=%s]' % (ref_chain, ch)) + iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \ + % (ref_chain, ch)) nr_res = iface_view.residue_count closest.append((nr_res, ch)) closest_chain = max(closest)[1] @@ -2156,8 +2180,8 @@ def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping, # to superpose the full oligomer (e.g. if some chains are open/closed) for c1, c2 in itertools.product(g1, g2): # get superposition transformation - chain_1 = ent_1.Select('cname=%s' % c1) - chain_2 = ent_2.Select('cname=%s' % c2) + chain_1 = ent_1.Select('cname="%s"' % c1) + chain_2 = ent_2.Select('cname="%s"' % c2) res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False) # look for overlaps mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, @@ -2261,8 +2285,8 @@ def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation): atoms = [] for c1, c2 in chain_mapping.iteritems(): # get views and atom counts - chain_1 = ent_1.Select('cname=%s' % c1) - chain_2 = ent_2.Select('cname=%s' % c2) + chain_1 = ent_1.Select('cname="%s"' % c1) + chain_2 = ent_2.Select('cname="%s"' % c2) atom_count = chain_1.atom_count if atom_count != chain_2.atom_count: raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!') @@ -2294,13 +2318,13 @@ class _CachedRMSD: def GetChainView1(self, cname): """Get cached view on chain *cname* for :attr:`ent_1`.""" if cname not in self._chain_views_1: - self._chain_views_1[cname] = self.ent_1.Select('cname=%s' % cname) + self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname) return self._chain_views_1[cname] def GetChainView2(self, cname): """Get cached view on chain *cname* for :attr:`ent_2`.""" if cname not in self._chain_views_2: - self._chain_views_2[cname] = self.ent_2.Select('cname=%s' % cname) + self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname) return self._chain_views_2[cname] def GetSuperposition(self, c1, c2): -- GitLab