Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
a103f388
Commit
a103f388
authored
2 years ago
by
Studer Gabriel
Browse files
Options
Downloads
Patches
Plain Diff
lDDT: give more fine grained control over alignment similarity measures
parent
28b6d2d3
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
modules/mol/alg/pymod/chain_mapping.py
+65
-33
65 additions, 33 deletions
modules/mol/alg/pymod/chain_mapping.py
with
65 additions
and
33 deletions
modules/mol/alg/pymod/chain_mapping.py
+
65
−
33
View file @
a103f388
...
...
@@ -16,10 +16,12 @@ from ost.mol.alg import lddt
class
ChainMapper
:
def
__init__
(
self
,
target
,
resnum_alignments
=
False
,
pep_seqid_thr
=
95.
,
pep_gap_thr
=
0.1
,
nuc_seqid_thr
=
95.
,
nuc_gap_thr
=
0.1
,
pep_subst_mat
=
seq
.
alg
.
BLOSUM100
,
pep_gap_open
=
-
5
,
pep_gap_ext
=
-
2
,
nuc_subst_mat
=
seq
.
alg
.
NUC44
,
nuc_gap_open
=
-
4
,
nuc_gap_ext
=
-
4
):
"""
Object to compute chain mappings
"""
Object to compute chain mappings
:param target: Target structure onto which models are mapped.
Computations happen on a selection only containing
...
...
@@ -29,7 +31,23 @@ class ChainMapper:
Needleman-Wunsch to compute pairwise
alignments. Relevant for :attr:`~chem_groups`
and related attributes.
:type resnum_alignments: :class:`bool`
:type resnum_alignments: :class:`bool`
:param pep_seqid_thr: Threshold used to decide when two chains are
identical. 95 percent tolerates the few mutations
crystallographers like to do.
:type pep_seqid_thr: :class:`float`
:param pep_gap_thr: Additional threshold to avoid gappy alignments with
high seqid. The reason for not just normalizing
seqid by the longer sequence is that one sequence
might be a perfect subsequence of the other but only
cover half of it. This threshold checks for a
maximum allowed fraction of gaps in any of the two
sequences after stripping terminal gaps.
:type pep_gap_thr: :class:`float`
:param nuc_seqid_thr: Nucleotide equivalent for *pep_seqid_thr*
:type nuc_seqid_thr: :class:`float`
:param nuc_gap_thr: Nucleotide equivalent for *nuc_gap_thr*
:type nuc_gap_thr: :class:`float`
:param pep_subst_mat: Substitution matrix to align peptide sequences,
irrelevant if *resnum_alignments* is True
:type pep_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
...
...
@@ -39,14 +57,11 @@ class ChainMapper:
:param pep_gap_ext: Gap extension penalty to align peptide sequences,
irrelevant if *resnum_alignments* is True
:type pep_gap_ext: :class:`int`
:param nuc_subst_mat: Substitution matrix to align nucleotide sequences,
irrelevant if *resnum_alignments* is True
:param nuc_subst_mat: Nucleotide equivalent for *pep_subst_mat*
:type nuc_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
:param nuc_gap_open: Gap open penalty to align nucleotide sequences,
irrelevant if *resnum_alignments* is True
:param nuc_gap_open: Nucleotide equivalent for *pep_gap_open*
:type nuc_gap_open: :class:`int`
:param nuc_gap_ext: Gap extension penalty to align nucleotide sequences,
irrelevant if *resnum_alignments* is True
:param nuc_gap_ext: Nucleotide equivalent for *pep_gap_ext*
:type nuc_gap_ext: :class:`int`
"""
...
...
@@ -63,6 +78,12 @@ class ChainMapper:
nuc_gap_open
=
nuc_gap_open
,
nuc_gap_ext
=
nuc_gap_ext
)
# attributes
self
.
pep_seqid_thr
=
pep_seqid_thr
self
.
pep_gap_thr
=
pep_gap_thr
self
.
nuc_seqid_thr
=
nuc_seqid_thr
self
.
nuc_gap_thr
=
nuc_gap_thr
# lazy computed attributes
self
.
_chem_groups
=
None
self
.
_chem_group_alignments
=
None
...
...
@@ -97,7 +118,12 @@ class ChainMapper:
if
self
.
_chem_group_alignments
is
None
:
self
.
_chem_group_alignments
,
self
.
_chem_group_types
=
\
_GetChemGroupAlignments
(
self
.
polypep_seqs
,
self
.
polynuc_seqs
,
self
.
aligner
)
self
.
aligner
,
pep_seqid_thr
=
self
.
pep_seqid_thr
,
pep_gap_thr
=
self
.
pep_gap_thr
,
nuc_seqid_thr
=
self
.
nuc_seqid_thr
,
nuc_gap_thr
=
self
.
nuc_gap_thr
)
return
self
.
_chem_group_alignments
@property
...
...
@@ -131,7 +157,12 @@ class ChainMapper:
if
self
.
_chem_group_types
is
None
:
self
.
_chem_group_alignments
,
self
.
_chem_group_types
=
\
_GetChemGroupAlignments
(
self
.
polypep_seqs
,
self
.
polynuc_seqs
,
self
.
aligner
)
self
.
aligner
,
pep_seqid_thr
=
self
.
pep_seqid_thr
,
pep_gap_thr
=
self
.
pep_gap_thr
,
nuc_seqid_thr
=
self
.
nuc_seqid_thr
,
nuc_gap_thr
=
self
.
nuc_gap_thr
)
return
self
.
_chem_group_types
def
GetChemMapping
(
self
,
model
):
...
...
@@ -160,7 +191,8 @@ class ChainMapper:
idx
,
aln
=
_MapSequence
(
self
.
chem_group_ref_seqs
,
self
.
chem_group_types
,
s
,
mol
.
ChemType
.
AMINOACIDS
,
95.
,
0.1
,
self
.
aligner
)
self
.
pep_seqid_thr
,
self
.
pep_gap_thr
,
self
.
aligner
)
if
idx
is
not
None
:
mapping
[
idx
].
append
(
s
.
GetName
())
alns
[
idx
].
append
(
aln
)
...
...
@@ -169,14 +201,14 @@ class ChainMapper:
idx
,
aln
=
_MapSequence
(
self
.
chem_group_ref_seqs
,
self
.
chem_group_types
,
s
,
mol
.
ChemType
.
NUCLEOTIDES
,
95.
,
0.1
,
self
.
aligner
)
self
.
nuc_seqid_thr
,
self
.
nuc_gap_thr
,
self
.
aligner
)
if
idx
is
not
None
:
mapping
[
idx
].
append
(
s
.
GetName
())
alns
[
idx
].
append
(
aln
)
return
(
mapping
,
alns
,
mdl
)
def
GetNaivelDDTMapping
(
self
,
model
,
bb_only
=
False
,
inclusion_radius
=
15.0
,
thresholds
=
[
0.5
,
1.0
,
2.0
,
4.0
]):
"""
Naively iterates all possible chain mappings and returns the best
...
...
@@ -289,7 +321,6 @@ class ChainMapper:
return
best_mapping
def
GetGreedylDDTMapping
(
self
,
model
,
inclusion_radius
=
15.0
,
thresholds
=
[
0.5
,
1.0
,
2.0
,
4.0
],
seed_strategy
=
"
fast
"
,
steep_opt_rate
=
None
,
...
...
@@ -391,7 +422,6 @@ class ChainMapper:
elif
seed_strategy
==
"
block
"
:
return
_BlockGreedy
(
the_greed
,
block_seed_size
,
block_n_mdl_chains
)
def
GetRigidMapping
(
self
,
model
,
single_chain_gdtts_thresh
=
0.4
,
subsampling
=
None
,
first_complete
=
False
):
"""
Identify chain mapping based on rigid superposition
...
...
@@ -520,7 +550,6 @@ class ChainMapper:
return
final_mapping
def
GetNMappings
(
self
,
model
):
"""
Returns number of possible mappings
...
...
@@ -747,27 +776,32 @@ def _GetAlnProps(aln):
gap_frac_2
=
float
(
n_gaps_2
)
/
len
(
aln
.
GetSequence
(
1
).
GetGaplessString
())
return
(
seqid
,
gap_frac_1
,
gap_frac_2
)
def
_GetChemGroupAlignments
(
pep_seqs
,
nuc_seqs
,
aligner
,
seqid_thr
=
95.
,
gap_thr
=
0.1
):
def
_GetChemGroupAlignments
(
pep_seqs
,
nuc_seqs
,
aligner
,
pep_seqid_thr
=
95.
,
pep_gap_thr
=
0.1
,
nuc_seqid_thr
=
95.
,
nuc_gap_thr
=
0.1
):
"""
Returns alignments with groups of chemically equivalent chains
:param pep_seqs: List of polypeptide sequences
:type pep_seqs: :class:`seq.SequenceList`
:param nuc_seqs: List of polynucleotide sequences
:type nuc_seqs: :class:`seq.SequenceList`
:param seqid_thr: Threshold used to decide when two chains are identical.
95 percent tolerates the few mutations crystallographers
like to do.
:type seqid_thr: :class:`float`
:param gap_thr: Additional threshold to avoid gappy alignments with high
seqid. The reason for not just normalizing seqid by the
longer sequence is that one sequence might be a perfect
subsequence of the other but only cover half of it. This
threshold checks for a maximum allowed fraction of gaps
in any of the two sequences after stripping terminal gaps.
:type gap_thr: :class:`float`
:param aligner: Helper class to generate pairwise alignments
:type aligner: :class:`_Aligner`
:param pep_seqid_thr: Threshold used to decide when two peptide chains are
identical. 95 percent tolerates the few mutations
crystallographers like to do.
:type pep_seqid_thr: :class:`float`
:param pep_gap_thr: Additional threshold to avoid gappy alignments with high
seqid. The reason for not just normalizing seqid by the
longer sequence is that one sequence might be a perfect
subsequence of the other but only cover half of it. This
threshold checks for a maximum allowed fraction of gaps
in any of the two sequences after stripping terminal gaps.
:type pep_gap_thr: :class:`float`
:param nuc_seqid_thr: Nucleotide equivalent of *pep_seqid_thr*
:type nuc_seqid_thr: :class:`float`
:param nuc_gap_thr: Nucleotide equivalent of *nuc_gap_thr*
:type nuc_gap_thr: :class:`float`
:returns: Tuple with first element being an AlignmentList. Each alignment
represents a group of chemically equivalent chains and the first
sequence is the longest. Second element is a list of equivalent
...
...
@@ -775,9 +809,9 @@ def _GetChemGroupAlignments(pep_seqs, nuc_seqs, aligner, seqid_thr=95.,
[:class:`ost.ChemType.AMINOACIDS`,
:class:`ost.ChemType.NUCLEOTIDES`]
"""
pep_groups
=
_GroupSequences
(
pep_seqs
,
seqid_thr
,
gap_thr
,
aligner
,
pep_groups
=
_GroupSequences
(
pep_seqs
,
pep_
seqid_thr
,
pep_
gap_thr
,
aligner
,
mol
.
ChemType
.
AMINOACIDS
)
nuc_groups
=
_GroupSequences
(
nuc_seqs
,
seqid_thr
,
gap_thr
,
aligner
,
nuc_groups
=
_GroupSequences
(
nuc_seqs
,
nuc_
seqid_thr
,
nuc_
gap_thr
,
aligner
,
mol
.
ChemType
.
NUCLEOTIDES
)
group_types
=
[
mol
.
ChemType
.
AMINOACIDS
]
*
len
(
pep_groups
)
group_types
+=
[
mol
.
ChemType
.
NUCLEOTIDES
]
*
len
(
nuc_groups
)
...
...
@@ -789,8 +823,6 @@ def _GroupSequences(seqs, seqid_thr, gap_thr, aligner, chem_type):
"""
Get list of alignments representing groups of equivalent sequences
:param seqid_thr: Threshold used to decide when two chains are identical.
95 percent tolerates the few mutations crystallographers
like to do.
:type seqid_thr: :class:`float`
:param gap_thr: Additional threshold to avoid gappy alignments with high
seqid. The reason for not just normalizing seqid by the
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment