diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index f5e10d58c80624c56635169228f69cf557309919..932e00332bfd3956f806acd0a7be026def4e2b37 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -19,6 +19,14 @@ from ost import geom from ost.mol.alg import lddt from ost.mol.alg import qsscore +def _CSel(ent, cnames): + """ Returns view with specified chains + + Ensures that quotation marks are around chain names to not confuse + OST query language with weird special characters. + """ + query = "cname=" + ','.join([mol.QueryQuoteName(cname) for cname in cnames]) + return ent.Select(query) class MappingResult: """ Result object for the chain mapping functions in :class:`ChainMapper` @@ -383,7 +391,9 @@ class ReprResult: chain_rnums[chname].append(str(rnum)) chain_queries = list() for k,v in chain_rnums.items(): - chain_queries.append(f"(cname={k} and rnum={','.join(v)})") + q = f"(cname={mol.QueryQuoteName(k)} and " + q += f"rnum={','.join(v)})" + chain_queries.append(q) self._ost_query = " or ".join(chain_queries) return self._ost_query @@ -840,8 +850,8 @@ class ChainMapper: for ref_ch, mdl_ch in zip(ref_group, mdl_group): if ref_ch is not None and mdl_ch is not None: aln = ref_mdl_alns[(ref_ch, mdl_ch)] - aln.AttachView(0, self.target.Select(f"cname={ref_ch}")) - aln.AttachView(1, mdl.Select(f"cname={mdl_ch}")) + aln.AttachView(0, _CSel(self.target, [ref_ch])) + aln.AttachView(1, _CSel(mdl, [mdl_ch])) alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, one_to_one, alns) @@ -876,8 +886,8 @@ class ChainMapper: for ref_ch, mdl_ch in zip(ref_group, mdl_group): if ref_ch is not None and mdl_ch is not None: aln = ref_mdl_alns[(ref_ch, mdl_ch)] - aln.AttachView(0, self.target.Select(f"cname={ref_ch}")) - aln.AttachView(1, mdl.Select(f"cname={mdl_ch}")) + aln.AttachView(0, _CSel(self.target, [ref_ch])) + aln.AttachView(1, _CSel(mdl, [mdl_ch])) alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, mapping, @@ -957,8 +967,8 @@ class ChainMapper: for ref_ch, mdl_ch in zip(ref_group, mdl_group): if ref_ch is not None and mdl_ch is not None: aln = ref_mdl_alns[(ref_ch, mdl_ch)] - aln.AttachView(0, self.target.Select(f"cname={ref_ch}")) - aln.AttachView(1, mdl.Select(f"cname={mdl_ch}")) + aln.AttachView(0, _CSel(self.target, [ref_ch])) + aln.AttachView(1, _CSel(mdl, [mdl_ch])) alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, one_to_one, alns) @@ -993,8 +1003,8 @@ class ChainMapper: for ref_ch, mdl_ch in zip(ref_group, mdl_group): if ref_ch is not None and mdl_ch is not None: aln = ref_mdl_alns[(ref_ch, mdl_ch)] - aln.AttachView(0, self.target.Select(f"cname={ref_ch}")) - aln.AttachView(1, mdl.Select(f"cname={mdl_ch}")) + aln.AttachView(0, _CSel(self.target, [ref_ch])) + aln.AttachView(1, _CSel(mdl, [mdl_ch])) alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, mapping, @@ -1095,8 +1105,8 @@ class ChainMapper: for ref_ch, mdl_ch in zip(ref_group, mdl_group): if ref_ch is not None and mdl_ch is not None: aln = ref_mdl_alns[(ref_ch, mdl_ch)] - aln.AttachView(0, self.target.Select(f"cname={ref_ch}")) - aln.AttachView(1, mdl.Select(f"cname={mdl_ch}")) + aln.AttachView(0, _CSel(self.target, [ref_ch])) + aln.AttachView(1, _CSel(mdl, [mdl_ch])) alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, one_to_one, alns) @@ -1174,8 +1184,8 @@ class ChainMapper: for ref_ch, mdl_ch in zip(ref_group, mdl_group): if ref_ch is not None and mdl_ch is not None: aln = ref_mdl_alns[(ref_ch, mdl_ch)] - aln.AttachView(0, self.target.Select(f"cname={ref_ch}")) - aln.AttachView(1, mdl.Select(f"cname={mdl_ch}")) + aln.AttachView(0, _CSel(self.target, [ref_ch])) + aln.AttachView(1, _CSel(mdl, [mdl_ch])) alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, final_mapping, @@ -1301,7 +1311,7 @@ class ChainMapper: substructure_ref_mdl_alns = dict() mdl_views = dict() for ch in mdl.chains: - mdl_views[ch.GetName()] = mdl.Select(f"cname={ch.GetName()}") + mdl_views[ch.GetName()] = _CSel(mdl, [ch.GetName()]) for chem_group, mapping in zip(substructure_chem_groups, substructure_chem_mapping): for ref_ch in chem_group: @@ -1316,7 +1326,7 @@ class ChainMapper: idx_in_seq = ref_seq.GetPos(idx) tmp[idx_in_seq] = ref_seq[idx_in_seq] ref_seq = seq.CreateSequence(ref_ch, ''.join(tmp)) - ref_seq.AttachView(substructure.Select(f"cname={ref_ch}")) + ref_seq.AttachView(_CSel(substructure, [ref_ch])) mdl_seq = full_aln.GetSequence(1) mdl_seq = seq.CreateSequence(mdl_seq.GetName(), mdl_seq.GetString()) @@ -1491,7 +1501,7 @@ class ChainMapper: s = ''.join([r.one_letter_code for r in ch.residues]) s = seq.CreateSequence(ch.GetName(), s) - s.AttachView(view.Select(f"cname={ch.GetName()}")) + s.AttachView(_CSel(view, [ch.GetName()])) if n_pep == n_res: polypep_seqs.AddSequence(s) elif n_nuc == n_res: @@ -1509,7 +1519,7 @@ class ChainMapper: # select for chains for which we actually extracted the sequence chain_names = [s.GetAttachedView().chains[0].name for s in polypep_seqs] chain_names += [s.GetAttachedView().chains[0].name for s in polynuc_seqs] - view = view.Select(f"cname={','.join(chain_names)}") + view = _CSel(view, chain_names) return (view, polypep_seqs, polynuc_seqs) @@ -1976,14 +1986,15 @@ class _lDDTDecomposer: def _SetupScorer(self): for ch in self.ref.chains: # Select everything close to that chain - query = f"{self.inclusion_radius} <> [cname={ch.GetName()}] " - query += f"and cname!={ch.GetName()}" + query = f"{self.inclusion_radius} <> " + query += f"[cname={mol.QueryQuoteName(ch.GetName())}] " + query += f"and cname!={mol.QueryQuoteName(ch.GetName())}" for close_ch in self.ref.Select(query).chains: k1 = (ch.GetName(), close_ch.GetName()) k2 = (close_ch.GetName(), ch.GetName()) if k1 not in self.interface_scorer and \ k2 not in self.interface_scorer: - dimer_ref = self.ref.Select(f"cname={k1[0]},{k1[1]}") + dimer_ref = _CSel(self.ref, [k1[0], k1[1]]) s = lddt.lDDTScorer(dimer_ref, bb_only=True) self.interface_scorer[k1] = s self.interface_scorer[k2] = s @@ -2005,7 +2016,7 @@ class _lDDTDecomposer: # add any missing single chain scorer for ch in self.ref.chains: if ch.GetName() not in self.single_chain_scorer: - single_chain_ref = self.ref.Select(f"cname={ch.GetName()}") + single_chain_ref = _CSel(self.ref, [ch.GetName()]) self.single_chain_scorer[ch.GetName()] = \ lddt.lDDTScorer(single_chain_ref, bb_only = True) self.n += self.single_chain_scorer[ch.GetName()].n_distances @@ -2044,7 +2055,7 @@ class _lDDTDecomposer: if not (ref_ch, mdl_ch) in self.single_chain_cache: alns = dict() alns[mdl_ch] = self.ref_mdl_alns[(ref_ch, mdl_ch)] - mdl_sel = self.mdl.Select(f"cname={mdl_ch}") + mdl_sel = _CSel(self.mdl, [mdl_ch]) s = self.single_chain_scorer[ref_ch] _,_,_,conserved,_,_,_ = s.lDDT(mdl_sel, residue_mapping=alns, @@ -2062,7 +2073,7 @@ class _lDDTDecomposer: alns = dict() alns[mdl_ch1] = self.ref_mdl_alns[(ref_ch1, mdl_ch1)] alns[mdl_ch2] = self.ref_mdl_alns[(ref_ch2, mdl_ch2)] - mdl_sel = self.mdl.Select(f"cname={mdl_ch1},{mdl_ch2}") + mdl_sel = _CSel(self.mdl, [mdl_ch1, mdl_ch2]) s = self.interface_scorer[(ref_ch1, ref_ch2)] _,_,_,conserved,_,_,_ = s.lDDT(mdl_sel, residue_mapping=alns, @@ -2110,7 +2121,8 @@ class _lDDTGreedySearcher(_lDDTDecomposer): for ch in self.mdl.chains: ch_name = ch.GetName() self.mdl_neighbors[ch_name] = set() - query = f"{d} <> [cname={ch_name}] and cname !={ch_name}" + query = f"{d} <> [cname={mol.QueryQuoteName(ch_name)}]" + query += f" and cname !={mol.QueryQuoteName(ch_name)}" for close_ch in self.mdl.Select(query).chains: self.mdl_neighbors[ch_name].add(close_ch.GetName()) @@ -2531,7 +2543,7 @@ class _QSScoreGreedySearcher(qsscore.QSScorer): self.single_chain_scorer = dict() self.single_chain_cache = dict() for ch in self.ref.chains: - single_chain_ref = self.ref.Select(f"cname={ch.GetName()}") + single_chain_ref = _CSel(self.ref, [ch.GetName()]) self.single_chain_scorer[ch.GetName()] = \ lddt.lDDTScorer(single_chain_ref, bb_only = True) @@ -2539,7 +2551,7 @@ class _QSScoreGreedySearcher(qsscore.QSScorer): if not (ref_ch, mdl_ch) in self.single_chain_cache: alns = dict() alns[mdl_ch] = self.ref_mdl_alns[(ref_ch, mdl_ch)] - mdl_sel = self.mdl.Select(f"cname={mdl_ch}") + mdl_sel = _CSel(self.mdl, [mdl_ch]) s = self.single_chain_scorer[ref_ch] _,_,_,conserved,_,_,_ = s.lDDT(mdl_sel, residue_mapping=alns, @@ -3309,7 +3321,7 @@ def _ExtractMSAPos(msa, s_idx, indices, view): Indices refers to column indices in msa! """ s = msa.GetSequence(s_idx) - s_v = view.Select(f"cname={s.GetName()}") + s_v = _CSel(view, [s.GetName()]) # sanity check assert(len(s.GetGaplessString()) == len(s_v.residues)) diff --git a/modules/mol/alg/pymod/lddt.py b/modules/mol/alg/pymod/lddt.py index 92f41ec4cc97e79bf69df6b63131b1ba913edd6d..0f464d02bd28a330473191472c44275df9cc0263 100644 --- a/modules/mol/alg/pymod/lddt.py +++ b/modules/mol/alg/pymod/lddt.py @@ -682,8 +682,8 @@ class lDDTScorer: ch_name = chain.GetName() if ch_name not in chain_mapping: sm = self.symmetry_settings - dummy_scorer = lDDTScorer(model.Select("cname="+ch_name), - self.compound_lib, + mdl_sel = model.Select(f"cname={mol.QueryQuoteName(ch_name)}") + dummy_scorer = lDDTScorer(mdl_sel, self.compound_lib, symmetry_settings = sm, inclusion_radius = self.inclusion_radius, bb_only = self.bb_only)