Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
37aab3a0
Commit
37aab3a0
authored
2 years ago
by
Studer Gabriel
Browse files
Options
Downloads
Patches
Plain Diff
add stereochemistry Python module
parent
b1a69bdf
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
modules/mol/alg/pymod/CMakeLists.txt
+1
-0
1 addition, 0 deletions
modules/mol/alg/pymod/CMakeLists.txt
modules/mol/alg/pymod/stereochemistry.py
+362
-0
362 additions, 0 deletions
modules/mol/alg/pymod/stereochemistry.py
with
363 additions
and
0 deletions
modules/mol/alg/pymod/CMakeLists.txt
+
1
−
0
View file @
37aab3a0
...
@@ -26,6 +26,7 @@ set(OST_MOL_ALG_PYMOD_MODULES
...
@@ -26,6 +26,7 @@ set(OST_MOL_ALG_PYMOD_MODULES
qsscore.py
qsscore.py
scoring.py
scoring.py
chain_mapping.py
chain_mapping.py
stereochemistry.py
)
)
if
(
NOT ENABLE_STATIC
)
if
(
NOT ENABLE_STATIC
)
...
...
This diff is collapsed.
Click to expand it.
modules/mol/alg/pymod/stereochemistry.py
0 → 100644
+
362
−
0
View file @
37aab3a0
import
os
import
json
import
numpy
as
np
from
ost
import
geom
from
ost
import
mol
def
_PotentialDisulfid
(
a_one
,
a_two
):
"""
Returns whether two atoms can potentially build a disulfid bond
Assumes that they
'
re from two distinct residues
"""
if
a_one
.
GetName
()
==
"
SG
"
and
a_two
.
GetName
()
==
"
SG
"
:
if
a_one
.
GetResidue
().
GetName
()
==
"
CYS
"
:
if
a_two
.
GetResidue
().
GetName
()
==
"
CYS
"
:
return
True
return
False
def
_GetAngles
(
bonds
):
"""
Returns list of angles based on bonds
Returns list of tuples, each tuple has three atom handles
representing angles
"""
angles
=
list
()
done
=
set
()
for
bond
in
bonds
:
h1
=
bond
.
first
.
GetHashCode
()
h2
=
bond
.
second
.
GetHashCode
()
for
a
in
bond
.
first
.
GetBondPartners
():
h0
=
a
.
GetHashCode
()
if
h0
!=
h2
:
if
((
h0
,
h1
,
h2
))
not
in
done
and
(
h2
,
h1
,
h0
)
not
in
done
:
angles
.
append
((
a
,
bond
.
first
,
bond
.
second
))
done
.
add
((
h0
,
h1
,
h2
))
for
a
in
bond
.
second
.
GetBondPartners
():
h3
=
a
.
GetHashCode
()
if
h3
!=
h1
:
if
((
h1
,
h2
,
h3
))
not
in
done
and
(
h3
,
h2
,
h1
)
not
in
done
:
angles
.
append
((
bond
.
first
,
bond
.
second
,
a
))
done
.
add
((
h1
,
h2
,
h3
))
return
angles
def
_ParseBondData
(
doc
):
"""
Parse stereochemistry data for bonds
That is expected distances and standard deviations from a
:class:`gemmi.Document`. Concatenates results form all loops with tags:
_chem_comp_bond.comp_id, _chem_comp_bond.atom_id_1,
_chem_comp_bond.atom_id_2, _chem_comp_bond.value_dist,
_chem_comp_bond.value_dist_esd
:param doc: Gemmi doc representing cif file opened with
gemmi.cif.read_file(filepath)
:type doc: :class:`gemmi.Document`
:returns: :class:`dict` with one key per compound, the respective value
is again a dict with key f
"
{at_1}_{at_2}
"
and value
[dist, dist_std].
"""
data
=
dict
()
for
block
in
doc
:
comp_id
=
block
.
find_values
(
"
_chem_comp_bond.comp_id
"
)
at_1
=
block
.
find_values
(
"
_chem_comp_bond.atom_id_1
"
)
at_2
=
block
.
find_values
(
"
_chem_comp_bond.atom_id_2
"
)
dist
=
block
.
find_values
(
"
_chem_comp_bond.value_dist
"
)
dist_std
=
block
.
find_values
(
"
_chem_comp_bond.value_dist_esd
"
)
if
None
not
in
[
comp_id
,
at_1
,
at_2
,
dist
,
dist_std
]:
for
a
,
b
,
c
,
d
,
e
in
zip
(
comp_id
,
at_1
,
at_2
,
dist
,
dist_std
):
if
a
not
in
data
:
data
[
a
]
=
dict
()
data
[
a
][
f
"
{
b
}
_
{
c
}
"
]
=
[
float
(
d
),
float
(
e
)]
return
data
def
_ParseAngleData
(
doc
):
"""
Parse stereochemistry data for angles
That is expected distances and standard deviations from a
:class:`gemmi.Document`. Concatenates results form all loops with tags:
_chem_comp_angle.comp_id, _chem_comp_angle.atom_id_1,
_chem_comp_angle.atom_id_2, _chem_comp_angle.atom_id_2,
_chem_comp_angle.value_angle, _chem_comp_angle.value_angle_esd
:param doc: Gemmi doc representing cif file opened with
gemmi.cif.read_file(filepath)
:type doc: :class:`gemmi.Document`
:returns: :class:`dict` with one key per compound, the respective value
is again a dict with key f
"
{at_1}_{at_2}_{at_3}
"
and value
[angle, angle_std].
"""
data
=
dict
()
for
block
in
doc
:
comp_id
=
block
.
find_values
(
"
_chem_comp_angle.comp_id
"
)
at_1
=
block
.
find_values
(
"
_chem_comp_angle.atom_id_1
"
)
at_2
=
block
.
find_values
(
"
_chem_comp_angle.atom_id_2
"
)
at_3
=
block
.
find_values
(
"
_chem_comp_angle.atom_id_3
"
)
angle
=
block
.
find_values
(
"
_chem_comp_angle.value_angle
"
)
angle_std
=
block
.
find_values
(
"
_chem_comp_angle.value_angle_esd
"
)
if
None
not
in
[
comp_id
,
at_1
,
at_2
,
at_3
,
angle
,
angle_std
]:
for
a
,
b
,
c
,
d
,
e
,
f
in
zip
(
comp_id
,
at_1
,
at_2
,
at_3
,
angle
,
angle_std
):
if
a
not
in
data
:
data
[
a
]
=
dict
()
data
[
a
][
f
"
{
b
}
_
{
c
}
_
{
d
}
"
]
=
[
float
(
e
),
float
(
f
)]
return
data
def
StereoDataFromMON_LIB
(
mon_lib_path
,
compounds
=
None
):
"""
Parses stereochemistry parameters from CCP4 MON_LIB
CCP4 `MON_LIB <https://www.ccp4.ac.uk/html/mon_lib.html>`_ contains
data on ideal bond lengths/angles for compounds.
Original data (several updates in the meantime) come from:
* Amino acid bond lengths and angles: Engh and Huber, Acta Cryst.
A47, 392-400 (1991).
* Purine and pyrimidine bond lengths and angles: O. Kennard & R. Taylor
(1982), J. Am. Soc. Chem. vol. 104, pp. 3209-3212.
* Sugar-phosphate backbone bond lengths and bond angles: W. Saenger’s
Principles of Nucleic Acid Structure (1983), Springer-Verlag, pp. 70,86.
This function adds a dependency to the
`gemmi <https://github.com/project-gemmi/gemmi/>`_ library to read cif
files.
:param mon_lib_path: Path to CCP4 MON_LIB
:type mon_lib_path: :class:`str`
:param compounds: Compounds to parse - parses proteinogenic amino acids
and nucleotides if not given.
:type compounds: :class:`list`
:returns: :class:`dict` with stereochemistry parameters
"""
if
compounds
is
None
:
compounds
=
[
'
ALA
'
,
'
ARG
'
,
'
ASN
'
,
'
ASP
'
,
'
CYS
'
,
'
GLN
'
,
'
GLU
'
,
'
GLY
'
,
'
HIS
'
,
'
ILE
'
,
'
LEU
'
,
'
LYS
'
,
'
MET
'
,
'
MSE
'
,
'
PHE
'
,
'
PRO
'
,
'
SER
'
,
'
THR
'
,
'
TRP
'
,
'
TYR
'
,
'
VAL
'
,
'
DA
'
,
'
A
'
,
'
DC
'
,
'
C
'
,
'
DG
'
,
'
G
'
,
'
DU
'
,
'
U
'
,
'
DT
'
,
'
DI
'
,
'
I
'
]
cif_paths
=
list
()
for
c
in
compounds
:
p
=
os
.
path
.
join
(
mon_lib_path
,
c
[
0
].
lower
(),
c
+
"
.cif
"
)
if
not
os
.
path
.
exists
(
p
):
raise
RuntimeError
(
f
"
Tried to find cif file for compound
{
c
}
"
f
"
in specified MON_LIB (
{
mon_lib_path
}
).
"
f
"
Expected file (
{
p
}
) does not exist.
"
)
cif_paths
.
append
(
p
)
# hide import to avoid it as dependency for the whole module
from
gemmi
import
cif
# construct return dict from first element and subsequently
# add the remainder
doc
=
cif
.
read_file
(
cif_paths
[
0
])
data
=
{
"
bond_data
"
:
_ParseBondData
(
doc
),
"
angle_data
"
:
_ParseAngleData
(
doc
)}
for
cp
in
cif_paths
[
1
:]:
doc
=
cif
.
read_file
(
cp
)
bond_data
=
_ParseBondData
(
doc
)
angle_data
=
_ParseAngleData
(
doc
)
data
[
"
bond_data
"
].
update
(
bond_data
)
data
[
"
angle_data
"
].
update
(
angle_data
)
return
data
def
GetBondParam
(
a1
,
a2
,
stereo_data
):
"""
Returns mean and standard deviation for bond
:param a1: First atom that defines bond
:type a1: :class:`mol.AtomView`/:class:`mol.AtomHandle`
:param a2: Second atom that defines bond
:type a2: :class:`mol.AtomView`/:class:`mol.AtomHandle`
:param stereo_data: Stereochemistry data
:type stereo_data: :class:`dict`
:returns: :class:`tuple` with mean and standard deviation. Values are None
if respective bond is not found in *stereo_data*
"""
if
a1
.
GetResidue
().
GetHashCode
()
==
a2
.
GetResidue
().
GetHashCode
():
# intra residue case, inter-residue case not yet implemented
rname
=
a1
.
GetResidue
().
GetName
()
a1name
=
a1
.
GetName
()
a2name
=
a2
.
GetName
()
if
rname
in
stereo_data
[
"
bond_data
"
]:
key
=
a1name
+
"
_
"
+
a2name
if
key
in
stereo_data
[
"
bond_data
"
][
rname
]:
mean
=
stereo_data
[
"
bond_data
"
][
rname
][
key
][
0
]
std
=
stereo_data
[
"
bond_data
"
][
rname
][
key
][
1
]
return
(
mean
,
std
)
key
=
a2name
+
"
_
"
+
a1name
if
key
in
stereo_data
[
"
bond_data
"
][
rname
]:
mean
=
stereo_data
[
"
bond_data
"
][
rname
][
key
][
0
]
std
=
stereo_data
[
"
bond_data
"
][
rname
][
key
][
1
]
return
(
mean
,
std
)
return
(
None
,
None
)
def
GetAngleParam
(
a1
,
a2
,
a3
,
stereo_data
):
"""
Returns mean and standard deviation for angle
:param a1: First atom that defines angle
:type a1: :class:`mol.AtomView`/:class:`mol.AtomHandle`
:param a2: Second atom that defines angle
:type a2: :class:`mol.AtomView`/:class:`mol.AtomHandle`
:param a3: Second atom that defines angle
:type a3: :class:`mol.AtomView`/:class:`mol.AtomHandle`
:param stereo_data: Stereochemistry data
:type stereo_data: :class:`dict`
:returns: :class:`tuple` with mean and standard deviation. Values are None
if respective angle is not found in *stereo_data*
"""
h1
=
a1
.
GetResidue
().
handle
.
GetHashCode
()
h2
=
a2
.
GetResidue
().
handle
.
GetHashCode
()
h3
=
a3
.
GetResidue
().
handle
.
GetHashCode
()
if
h1
==
h2
and
h2
==
h3
:
# intra residue case, inter-residue case not yet implemented
rname
=
a1
.
GetResidue
().
GetName
()
a1name
=
a1
.
GetName
()
a2name
=
a2
.
GetName
()
a3name
=
a3
.
GetName
()
if
rname
in
stereo_data
[
"
angle_data
"
]:
key
=
a1name
+
"
_
"
+
a2name
+
"
_
"
+
a3name
if
key
in
stereo_data
[
"
angle_data
"
][
rname
]:
mean
=
stereo_data
[
"
angle_data
"
][
rname
][
key
][
0
]
std
=
stereo_data
[
"
angle_data
"
][
rname
][
key
][
1
]
return
(
mean
,
std
)
key
=
a3name
+
"
_
"
+
a2name
+
"
_
"
+
a1name
if
key
in
stereo_data
[
"
angle_data
"
][
rname
]:
mean
=
stereo_data
[
"
angle_data
"
][
rname
][
key
][
0
]
std
=
stereo_data
[
"
angle_data
"
][
rname
][
key
][
1
]
return
(
mean
,
std
)
return
(
None
,
None
)
def
GetClashes
(
ent
,
vdw_radii
=
None
,
tolerance
=
1.5
,
disulfid_dist
=
2.03
,
disulfid_tolerance
=
1.0
):
"""
Identifies clashing atoms
A clash between two non-bonded atoms is defined as their distance d being
below the sum of their vdw radii with some subtracted tolerance value.
The default values are not very sensitive.
:param ent: Entity for which you want to identify clashing atoms
:type ent: :class:`EntityHandle`/:class:`EntityView`
:param vdw_radii: Element based van der Waals radii. Only atoms of these
elements will be considered. If not given, default values
for all elements occuring in proteins/nucleotides are
used. Must be provided as :class:`dict`, where they key
are elements (capitalized) and value the respective radii
in Angstrom.
:type vdw_radii: :class:`dict`
:param tolerance: Tolerance value
:param disulfid_dist: Summed vdw radius that is used if two Sulfurs that can
potentially build a disulfid bond interact
:type disulfid_dist: :class:`float`
:param disulfid_tolerance: The respective tolerance
:type disulfid_dist: :class:`float`
:returns: A :class:`list` of pairs. Each pair consists of two
:class:`mol.AtomView` from *ent* that are clashing.
"""
if
vdw_radii
is
None
:
vdw_radii
=
{
"
C
"
:
1.70
,
"
N
"
:
1.55
,
"
O
"
:
1.52
,
"
P
"
:
1.80
,
"
S
"
:
1.80
}
for
ele
in
vdw_radii
.
keys
():
if
ele
.
upper
()
!=
ele
:
raise
RuntimeError
(
f
"
Elements in vdw_radii must be upper case.
"
f
"
Got
{
ele
}
"
)
# it would be elegant to just do a selection by the ele property. However,
# thats case sensitive so someone could define a vdw radius for Cl but
# the element of the atom is CL.
elements
=
set
([
ele
.
upper
()
for
ele
in
vdw_radii
.
keys
()])
for
a
in
ent
.
atoms
:
if
a
.
GetElement
().
upper
()
in
elements
:
a
.
SetIntProp
(
"
clash_check
"
,
1
)
clash_ent
=
ent
.
Select
(
"
gaclash_check:0=1
"
)
max_radius
=
max
(
vdw_radii
.
values
())
max_radius
=
max
(
max_radius
,
disulfid_dist
)
min_tolerance
=
min
(
tolerance
,
disulfid_tolerance
)
radius
=
2
*
max_radius
-
min_tolerance
done
=
set
()
return_list
=
list
()
for
a
in
clash_ent
.
atoms
:
a_hash
=
a
.
handle
.
GetHashCode
()
close_atoms
=
clash_ent
.
FindWithin
(
a
.
GetPos
(),
radius
)
for
ca
in
close_atoms
:
ca_hash
=
ca
.
handle
.
GetHashCode
()
if
a_hash
!=
ca_hash
and
not
mol
.
BondExists
(
a
.
handle
,
ca
.
handle
):
d
=
geom
.
Distance
(
a
.
GetPos
(),
ca
.
GetPos
())
if
_PotentialDisulfid
(
a
,
ca
):
thresh
=
disulfid_dist
-
disulfid_tolerance
else
:
thresh
=
vdw_radii
[
a
.
GetElement
().
upper
()]
thresh
+=
vdw_radii
[
ca
.
GetElement
().
upper
()]
thresh
-=
tolerance
if
d
<
thresh
:
# check if already there, add if not
hash_pair
=
(
min
(
a_hash
,
ca_hash
),
max
(
a_hash
,
ca_hash
))
if
hash_pair
not
in
done
:
done
.
add
(
hash_pair
)
return_list
.
append
((
a
,
ca
))
return
return_list
def
GetBadBonds
(
ent
,
stereo_data
=
None
,
tolerance
=
12
):
"""
Identify unrealistic bonds
:param ent: Entity for which you want to identify unrealistic bonds
:type ent: :class:`mol.EntityHandle`/:class:`mol.EntityView`
:param stereo_data: Stereochemistry data
:type stereo_data: :class:`dict`
:param tolerance: Bonds that devaiate more than *tolerance* times standard
deviation from expected mean are considered bad
:type tolerance: :class:`int`
:returns: :class:`list` of pairs. Each pair consists of two
:class:`mol.AtomHandle` from *ent* that represent bad bonds.
"""
assert
(
"
bond_data
"
in
stereo_data
)
return_list
=
list
()
for
b
in
ent
.
bonds
:
a1
=
b
.
first
a2
=
b
.
second
mean
,
std
=
GetBondParam
(
a1
,
a2
,
stereo_data
)
if
None
not
in
[
mean
,
std
]:
diff
=
abs
(
mean
-
b
.
length
)
if
diff
>
tolerance
*
std
:
return_list
.
append
((
a1
,
a2
))
return
return_list
def
GetBadAngles
(
ent
,
stereo_data
=
None
,
tolerance
=
12
):
"""
Identify unrealistic angles
:param ent: Entity for which you want to identify unrealistic angles
:type ent: :class:`mol.EntityHandle`/:class:`mol.EntityView`
:param stereo_data: Stereochemistry data
:type stereo_data: :class:`dict`
:param tolerance: Angles that devaiate more than *tolerance* times standard
deviation from expected mean are considered bad
:type tolerance: :class:`int`
:returns: :class:`list` of tuples. Each tuple consists of three
:class:`mol.AtomHandle` from *ent* that represent bad angles.
"""
assert
(
"
angle_data
"
in
stereo_data
)
return_list
=
list
()
for
a
in
_GetAngles
(
ent
.
bonds
):
mean
,
std
=
GetAngleParam
(
a
[
0
],
a
[
1
],
a
[
2
],
stereo_data
)
if
None
not
in
[
mean
,
std
]:
angle
=
geom
.
Angle
(
a
[
0
].
GetPos
()
-
a
[
1
].
GetPos
(),
a
[
2
].
GetPos
()
-
a
[
1
].
GetPos
())
angle
=
angle
/
np
.
pi
*
180
# stereo params are in degrees
diff
=
abs
(
mean
-
angle
)
if
diff
>
tolerance
*
std
:
return_list
.
append
(
a
)
return
return_list
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment