Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
81738ccb
Commit
81738ccb
authored
Aug 4, 2022
by
Studer Gabriel
Browse files
Options
Downloads
Patches
Plain Diff
lDDT: introduce minimal chain lengths in chain mapping
parent
2f03a9fa
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
modules/mol/alg/pymod/chain_mapping.py
+33
-38
33 additions, 38 deletions
modules/mol/alg/pymod/chain_mapping.py
with
33 additions
and
38 deletions
modules/mol/alg/pymod/chain_mapping.py
+
33
−
38
View file @
81738ccb
...
...
@@ -87,17 +87,24 @@ class ChainMapper:
:type nuc_gap_open: :class:`int`
:param nuc_gap_ext: Nucleotide equivalent for *pep_gap_ext*
:type nuc_gap_ext: :class:`int`
:param min_pep_length: Minimal number of residues for a peptide chain to be
considered in target and in models.
:type min_pep_length: :class:`int`
:param min_nuc_length: Minimal number of residues for a nucleotide chain to be
considered in target and in models.
:type min_nuc_length: :class:`int`
"""
def
__init__
(
self
,
target
,
resnum_alignments
=
False
,
pep_seqid_thr
=
95.
,
pep_gap_thr
=
0.1
,
nuc_seqid_thr
=
95.
,
nuc_gap_thr
=
0.1
,
pep_subst_mat
=
seq
.
alg
.
BLOSUM100
,
pep_gap_open
=
-
5
,
pep_gap_ext
=
-
2
,
nuc_subst_mat
=
seq
.
alg
.
NUC44
,
nuc_gap_open
=
-
4
,
nuc_gap_ext
=
-
4
):
nuc_gap_open
=
-
4
,
nuc_gap_ext
=
-
4
,
min_pep_length
=
10
,
min_nuc_length
=
4
):
# target structure preprocessing
self
.
_target
,
self
.
_polypep_seqs
,
self
.
_polynuc_seqs
=
\
_ProcessStructure
(
target
)
_ProcessStructure
(
target
,
min_pep_length
,
min_nuc_length
)
# helper class to generate pairwise alignments
self
.
aligner
=
_Aligner
(
resnum_aln
=
resnum_alignments
,
...
...
@@ -113,6 +120,8 @@ class ChainMapper:
self
.
pep_gap_thr
=
pep_gap_thr
self
.
nuc_seqid_thr
=
nuc_seqid_thr
self
.
nuc_gap_thr
=
nuc_gap_thr
self
.
min_pep_length
=
min_pep_length
self
.
min_nuc_length
=
min_nuc_length
# lazy computed attributes
self
.
_chem_groups
=
None
...
...
@@ -249,7 +258,9 @@ class ChainMapper:
polynucleotides whose ATOMSEQ exactly matches the sequence
info in the returned alignments.
"""
mdl
,
mdl_pep_seqs
,
mdl_nuc_seqs
=
_ProcessStructure
(
model
)
mdl
,
mdl_pep_seqs
,
mdl_nuc_seqs
=
_ProcessStructure
(
model
,
self
.
min_pep_length
,
self
.
min_nuc_length
)
mapping
=
[
list
()
for
x
in
self
.
chem_groups
]
alns
=
[
seq
.
AlignmentList
()
for
x
in
self
.
chem_groups
]
...
...
@@ -645,7 +656,7 @@ class ChainMapper:
# INTERNAL HELPERS
##################
def
_ProcessStructure
(
ent
):
def
_ProcessStructure
(
ent
,
min_pep_length
,
min_nuc_length
):
"""
Entity processing for chain mapping
* Selects view containing peptide and nucleotide residues
...
...
@@ -653,6 +664,7 @@ def _ProcessStructure(ent):
* Attaches corresponding :class:`ost.mol.EntityView` to each sequence
* Ensures strictly increasing residue numbers without insertion codes
in each chain
* filters chains by length
:param ent: Entity to process
:type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
...
...
@@ -662,47 +674,16 @@ def _ProcessStructure(ent):
returned view, sequences have :class:`ost.mol.EntityView` of
according chains attached 3) same for polynucleotide chains
"""
view
=
_StructureSelection
(
ent
)
polypep_seqs
,
polynuc_seqs
=
_GetAtomSeqs
(
view
)
return
(
view
,
polypep_seqs
,
polynuc_seqs
)
def
_StructureSelection
(
ent
):
"""
Helper for _ProcessStructure
Selects structure to only contain peptide and nucleotide residues
Additionally selects for residues that also contain the backbone
representatives (CA for peptide and C3
'
for nucleotides) to avoid ATOMSEQ
alignment inconsistencies when switching between all atom and backbone only
representations.
"""
query
=
"
peptide=true or nucleotide=true and aname=
\"
CA
\"
,
\"
C3
'
\"
"
sel
=
ent
.
Select
(
query
)
view
=
ent
.
CreateEmptyView
()
for
r
in
sel
.
residues
:
view
.
AddResidue
(
r
.
handle
,
mol
.
INCLUDE_ALL
)
return
view
def
_GetAtomSeqs
(
ent
):
"""
Helper for _ProcessStructure
Extracts and returns atomseqs for polypeptide/nucleotide chains with
attached :class:`ost.mol.EntityView`
:param ent: Entity for which you want to extract atomseqs
:type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
:returns: Two lists, first containing atomseqs for all polypeptide chains,
the second is the same for polynucleotides
:raises: :class:`RuntimeError` if ent contains any residue which evaluates
False for `r.IsPeptideLinking() or r.IsNucleotideLinking()`,
these two types are not strictly separated in different chains,
residue numbers in a chain are not strictly increasing or contain
insertion codes.
"""
polypep_seqs
=
seq
.
CreateSequenceList
()
polynuc_seqs
=
seq
.
CreateSequenceList
()
for
ch
in
ent
.
chains
:
for
ch
in
view
.
chains
:
n_res
=
len
(
ch
.
residues
)
n_pep
=
sum
([
r
.
IsPeptideLinking
()
for
r
in
ch
.
residues
])
n_nuc
=
sum
([
r
.
IsNucleotideLinking
()
for
r
in
ch
.
residues
])
...
...
@@ -716,6 +697,13 @@ def _GetAtomSeqs(ent):
raise
RuntimeError
(
"
All residues must either be peptide_linking
"
"
or nucleotide_linking
"
)
# filter out short chains
if
n_pep
>
0
and
n_pep
<
min_pep_length
:
continue
if
n_nuc
>
0
and
n_nuc
<
min_nuc_length
:
continue
# check if no insertion codes are present in residue numbers
ins_codes
=
[
r
.
GetNumber
().
GetInsCode
()
for
r
in
ch
.
residues
]
if
len
(
set
(
ins_codes
))
!=
1
or
ins_codes
[
0
]
!=
'
\0
'
:
...
...
@@ -730,7 +718,7 @@ def _GetAtomSeqs(ent):
s
=
''
.
join
([
r
.
one_letter_code
for
r
in
ch
.
residues
])
s
=
seq
.
CreateSequence
(
ch
.
GetName
(),
s
)
s
.
AttachView
(
ent
.
Select
(
f
"
cname=
{
ch
.
GetName
()
}
"
))
s
.
AttachView
(
view
.
Select
(
f
"
cname=
{
ch
.
GetName
()
}
"
))
if
n_pep
==
n_res
:
polypep_seqs
.
AddSequence
(
s
)
elif
n_nuc
==
n_res
:
...
...
@@ -738,7 +726,13 @@ def _GetAtomSeqs(ent):
else
:
raise
RuntimeError
(
"
This shouldnt happen
"
)
return
(
polypep_seqs
,
polynuc_seqs
)
# select for chains for which we actually extracted the sequence
chain_names
=
[
s
.
GetAttachedView
().
chains
[
0
].
name
for
s
in
polypep_seqs
]
chain_names
+=
[
s
.
GetAttachedView
().
chains
[
0
].
name
for
s
in
polynuc_seqs
]
query
=
f
"
cname=
{
'
,
'
.
join
(
chain_names
)
}
"
view
=
view
.
Select
(
query
)
return
(
view
,
polypep_seqs
,
polynuc_seqs
)
class
_Aligner
:
def
__init__
(
self
,
pep_subst_mat
=
seq
.
alg
.
BLOSUM100
,
pep_gap_open
=
-
5
,
...
...
@@ -842,6 +836,7 @@ def _GetAlnPropsTwo(aln):
return
(
seq
.
alg
.
SequenceIdentity
(
aln
),
float
(
n_aligned
)
/
n_tot
)
def
_GetAlnPropsOne
(
aln
):
"""
Returns basic properties of *aln* version one...
:param aln: Alignment to compute properties
...
...
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment