diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index 7189b3e9dfe3a3387715849332d5e7d6801a4c40..1d36613400011c6565aa7bb10545067d0adbed7f 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -1389,6 +1389,27 @@ class ChainMapper: return (view, polypep_seqs, polynuc_seqs) + def Align(self, s1, s2, stype): + """ Access to internal sequence alignment functionality + + Alignment parameterization is setup at ChainMapper construction + + :param s1: First sequence to align - must have view attached in case + of resnum_alignments + :type s1: :class:`ost.seq.SequenceHandle` + :param s2: Second sequence to align - must have view attached in case + of resnum_alignments + :type s2: :class:`ost.seq.SequenceHandle` + :param stype: Type of sequences to align, must be in + [:class:`ost.mol.ChemType.AMINOACIDS`, + :class:`ost.mol.ChemType.NUCLEOTIDES`] + :returns: Pairwise alignment of s1 and s2 + """ + if stype not in [mol.ChemType.AMINOACIDS, mol.ChemType.NUCLEOTIDES]: + raise RuntimeError("stype must be ost.mol.ChemType.AMINOACIDS or " + "ost.mol.ChemType.NUCLEOTIDES") + return self.aligner.Align(s1, s2, chem_type = stype) + # INTERNAL HELPERS ################## @@ -1726,9 +1747,22 @@ def _GetRefMdlAlns(ref_chem_groups, ref_chem_group_msas, mdl_chem_groups, # seq2: seq of ref chain # seq3: seq of mdl chain # => we need the alignment between seq2 and seq3 - aln = seq.CreateAlignment(merged_aln.GetSequence(1), - merged_aln.GetSequence(2)) - ref_mdl_alns[(ref_ch, mdl_ch)] = aln + s2 = merged_aln.GetSequence(1) + s3 = merged_aln.GetSequence(2) + # cut leading and trailing gap columns + a = 0 # number of leading gap columns + for idx in range(len(s2)): + if s2[idx] != '-' or s3[idx] != '-': + break + a += 1 + b = 0 # number of trailing gap columns + for idx in reversed(range(len(s2))): + if s2[idx] != '-' or s3[idx] != '-': + break + b += 1 + s2 = seq.CreateSequence(s2.GetName(), s2[a: len(s2)-b]) + s3 = seq.CreateSequence(s3.GetName(), s3[a: len(s3)-b]) + ref_mdl_alns[(ref_ch, mdl_ch)] = seq.CreateAlignment(s2, s3) return ref_mdl_alns diff --git a/modules/mol/alg/tests/test_chain_mapping.py b/modules/mol/alg/tests/test_chain_mapping.py index 0a663081ce80dbafb9b9978e13450138baf83bec..106f849a3dba71843f166a8354d33e22380a3daf 100644 --- a/modules/mol/alg/tests/test_chain_mapping.py +++ b/modules/mol/alg/tests/test_chain_mapping.py @@ -296,6 +296,57 @@ class TestChainMapper(unittest.TestCase): self.assertEqual(greedy_rigid_res.chem_groups[0][1], flat_map['Y']) self.assertEqual(greedy_rigid_res.chem_groups[2][0], flat_map['Z']) + # test Align function of ChainMapper + _, mdl_polypep_seqs, mdl_polynuc_seqs = mapper.ProcessStructure(mdl) + for x, ref_aln in greedy_rigid_res.alns.items(): + ref_ch = x[0] + mdl_ch = x[1] + + # search for that sequence in ref sequences as extracted from the mapper + ref_s = None + ref_s_type = None + for s in mapper.polypep_seqs: + if s.GetName() == ref_ch: + ref_s = s + ref_s_type = ost.mol.ChemType.AMINOACIDS + break + if ref_s is None: + for s in mapper.polynuc_seqs: + if s.GetName() == ref_ch: + ref_s = s + ref_s_type = ost.mol.ChemType.NUCLEOTIDES + break + + # search for that sequence in mdl sequences as extracted from the + # ProcessStructure call before + mdl_s = None + mdl_s_type = None + for s in mdl_polypep_seqs: + if s.GetName() == mdl_ch: + mdl_s = s + mdl_s_type = ost.mol.ChemType.AMINOACIDS + break + if mdl_s is None: + for s in mdl_polynuc_seqs: + if s.GetName() == mdl_ch: + mdl_s = s + mdl_s_type = ost.mol.ChemType.NUCLEOTIDES + break + + self.assertTrue(ref_s is not None) + self.assertTrue(mdl_s is not None) + self.assertEqual(ref_s_type, mdl_s_type) + + aln = mapper.Align(ref_s, mdl_s, ref_s_type) + self.assertEqual(ref_aln.GetSequence(0).GetName(), + aln.GetSequence(0).GetName()) + self.assertEqual(ref_aln.GetSequence(1).GetName(), + aln.GetSequence(1).GetName()) + self.assertEqual(ref_aln.GetSequence(0).GetString(), + aln.GetSequence(0).GetString()) + self.assertEqual(ref_aln.GetSequence(1).GetString(), + aln.GetSequence(1).GetString()) + if __name__ == "__main__": from ost import testutils