Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
9295ec07
Verified
Commit
9295ec07
authored
1 year ago
by
Xavier Robin
Browse files
Options
Downloads
Patches
Plain Diff
fix: SCHWED-6059 revisit coverage and avoid threshold effect
parent
5ee8a49f
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
modules/mol/alg/pymod/ligand_scoring.py
+37
-29
37 additions, 29 deletions
modules/mol/alg/pymod/ligand_scoring.py
modules/mol/alg/tests/test_ligand_scoring.py
+0
-8
0 additions, 8 deletions
modules/mol/alg/tests/test_ligand_scoring.py
with
37 additions
and
37 deletions
modules/mol/alg/pymod/ligand_scoring.py
+
37
−
29
View file @
9295ec07
...
...
@@ -72,15 +72,13 @@ class LigandScorer:
`substructure_match=True`, complete model ligands can be scored against
partial target ligands. One problem with this approach is that it is
very easy to find good matches to small, irrelevant ligands like EDO, CO2
or GOL. To counter that, the assignment algorithm first assigns full
matches (and ignores all partial matches, even if they would result in a
better score). Once all full matches are assigned, it assigns the remaining
matches by decreasing coverage (fraction of atoms of the model ligand atoms
covered in the target) with a window of `coverage_delta` (by default 0.05),
starting from the highest coverage in the ligands which are still to be
assigned. As a result, for instance, with a delta of 0.05, a low-score
match with coverage 0.96 would be preferred to a high-score match with
coverage 0.90.
or GOL. To counter that, the assignment algorithm considers the coverage,
expressed as the fraction of atoms of the model ligand atoms covered in the
target. Higher coverage matches are prioritized, but a match with a better
score will be preferred if it falls within a window of `coverage_delta`
(by default 0.05) of a worse-scoring match. As a result, for instance,
with a delta of 0.05, a low-score match with coverage 0.96 would be
preferred to a high-score match with coverage 0.90.
Assumptions:
...
...
@@ -814,27 +812,39 @@ class LigandScorer:
LogDebug
(
"
No model or target ligand, returning no assignment.
"
)
return
assignments
# First only consider full coverage matches.
def
_get_best_match
(
mat1_val
,
coverage_val
):
"""
Extract the row/column indices of the prediction matching the
given values.
"""
mat1_match_idx
=
np
.
argwhere
((
mat1
==
mat1_val
)
&
(
coverage
>=
coverage_val
))
# Multiple "best" - use mat2 to disambiguate
if
len
(
mat1_match_idx
)
>
1
:
# Get the values of mat2 at these positions
best_mat2_match
=
[
mat2
[
tuple
(
x
)]
for
x
in
mat1_match_idx
]
# Find the index of the best mat2
# Note: argmin returns the first value which is min.
best_mat2_idx
=
np
.
array
(
best_mat2_match
).
argmin
()
# Now get the original indices
return
mat1_match_idx
[
best_mat2_idx
]
else
:
return
mat1_match_idx
[
0
]
# First only consider top coverage matches.
min_coverage
=
np
.
max
(
coverage
)
# If there's no full coverage, allow a small delta
if
min_coverage
<
1
:
min_coverage
=
min_coverage
-
coverage_delta
while
min_coverage
>
0
-
coverage_delta
:
LogInfo
(
"
Looking for matches with coverage >= %s
"
%
min_coverage
)
while
min_coverage
>
0
:
LogVerbose
(
"
Looking for matches with coverage >= %s
"
%
min_coverage
)
min_mat1
=
LigandScorer
.
_nanmin_nowarn
(
mat1
,
coverage
<
min_coverage
)
while
not
np
.
isnan
(
min_mat1
):
best_mat1
=
np
.
argwhere
((
mat1
==
min_mat1
)
&
(
coverage
>=
min_coverage
))
# Multiple "best" - use mat2 to disambiguate
if
len
(
best_mat1
)
>
1
:
# Get the values of mat2 at these positions
best_mat2_match
=
[
mat2
[
tuple
(
x
)]
for
x
in
best_mat1
]
# Find the index of the best mat2
# Note: argmin returns the first value which is min.
best_mat2_idx
=
np
.
array
(
best_mat2_match
).
argmin
()
# Now get the original indices
max_i_trg
,
max_i_mdl
=
best_mat1
[
best_mat2_idx
]
else
:
max_i_trg
,
max_i_mdl
=
best_mat1
[
0
]
max_i_trg
,
max_i_mdl
=
_get_best_match
(
min_mat1
,
min_coverage
)
# Would we have a match for this model ligand with higher score
# but lower coverage?
alternative_matches
=
(
mat1
[:,
max_i_mdl
]
<
min_mat1
)
&
(
coverage
[:,
max_i_mdl
]
>
(
min_coverage
-
coverage_delta
))
if
np
.
any
(
alternative_matches
):
# Get the scores of these matches
LogVerbose
(
"
Found match with lower coverage but better score
"
)
min_mat1
=
np
.
min
(
mat1
[
alternative_matches
])
max_i_trg
,
max_i_mdl
=
_get_best_match
(
min_mat1
,
min_coverage
-
coverage_delta
)
# Disable row and column
mat1
[
max_i_trg
,
:]
=
np
.
nan
...
...
@@ -851,8 +861,6 @@ class LigandScorer:
min_mat1
=
LigandScorer
.
_nanmin_nowarn
(
mat1
,
coverage
<
min_coverage
)
# Recompute min_coverage
min_coverage
=
np
.
max
(
coverage
)
if
min_coverage
<
1
:
min_coverage
=
min_coverage
-
coverage_delta
return
assignments
@staticmethod
...
...
This diff is collapsed.
Click to expand it.
modules/mol/alg/tests/test_ligand_scoring.py
+
0
−
8
View file @
9295ec07
...
...
@@ -654,14 +654,6 @@ class TestLigandScoring(unittest.TestCase):
self
.
assertEqual
(
sc
.
rmsd_details
[
'
00001_
'
][
1
][
"
target_ligand
"
].
name
,
"
OLA
"
)
self
.
assertAlmostEqual
(
sc
.
rmsd
[
'
00001_
'
][
1
],
6.13006878
,
4
)
# Finally, we check that we still assign to the full match even with
# a large coverage delta
sc
=
LigandScorer
(
mdl
,
trg
,
model_ligands
=
[
mdl_lig
],
substructure_match
=
True
,
coverage_delta
=
0.5
)
self
.
assertEqual
(
sc
.
rmsd_details
[
'
00001_
'
][
1
][
"
coverage
"
],
1.0
)
self
.
assertEqual
(
sc
.
rmsd_details
[
'
00001_
'
][
1
][
"
target_ligand
"
].
name
,
"
RET
"
)
self
.
assertAlmostEqual
(
sc
.
rmsd
[
'
00001_
'
][
1
],
15.56022
,
4
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment