Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
f70681e4
Commit
f70681e4
authored
3 years ago
by
Studer Gabriel
Browse files
Options
Downloads
Patches
Plain Diff
lDDT: add lDDT based chain mapping functionality - work in progress
parent
e7a610b0
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
modules/mol/alg/pymod/CMakeLists.txt
+1
-0
1 addition, 0 deletions
modules/mol/alg/pymod/CMakeLists.txt
modules/mol/alg/pymod/__init__.py
+1
-0
1 addition, 0 deletions
modules/mol/alg/pymod/__init__.py
modules/mol/alg/pymod/chain_mapping.py
+531
-0
531 additions, 0 deletions
modules/mol/alg/pymod/chain_mapping.py
with
533 additions
and
0 deletions
modules/mol/alg/pymod/CMakeLists.txt
+
1
−
0
View file @
f70681e4
...
...
@@ -24,6 +24,7 @@ set(OST_MOL_ALG_PYMOD_MODULES
hbond.py
lddt.py
scoring.py
chain_mapping.py
)
if
(
NOT ENABLE_STATIC
)
...
...
This diff is collapsed.
Click to expand it.
modules/mol/alg/pymod/__init__.py
+
1
−
0
View file @
f70681e4
...
...
@@ -7,6 +7,7 @@ import ost.mol.alg.helix_kinks
import
ost.mol.alg.hbond
import
ost.mol.alg.lddt
import
ost.mol.alg.scoring
import
ost.mol.alg.chain_mapping
# Fills a list of reference clashing distances from a file (requires a path to the file)
def
FillClashingDistancesFromFile
(
filename
):
...
...
This diff is collapsed.
Click to expand it.
modules/mol/alg/pymod/chain_mapping.py
0 → 100644
+
531
−
0
View file @
f70681e4
import
itertools
from
ost
import
seq
from
ost
import
mol
from
ost.mol.alg
import
lddt
class
ChainMapper
:
def
__init__
(
self
,
target
):
"""
Object to compute chain mappings
:param target: Target structure onto which models are mapped.
Computations happen on a selection only containing
polypeptides and polynucleotides.
:type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
"""
self
.
target
=
target
.
Select
(
"
peptide=true or nucleotide=true
"
)
# lazy computed attributes
self
.
_chem_groups
=
None
self
.
_chem_group_alignments
=
None
self
.
_chem_group_ref_seqs
=
None
self
.
_chem_group_types
=
None
@property
def
chem_groups
(
self
):
"""
Groups of chemically equivalent poly-peptides/nucleotides in *target*
Seq. id. > 95% is considered equivalent. First chain in group is the one
with longest sequence.
:getter: Computed on first use (cached)
:type: :class:`list` of :class:`list` of :class:`str` (chain names)
"""
if
self
.
_chem_groups
is
None
:
self
.
_chem_groups
=
list
()
for
a
in
self
.
chem_group_alignments
:
self
.
_chem_groups
.
append
([
s
.
GetName
()
for
s
in
a
.
sequences
])
return
self
.
_chem_groups
@property
def
chem_group_alignments
(
self
):
"""
MSA for each group in :attr:`~chem_groups`
Sequences in MSAs exhibit same order as in :attr:`~chem_groups`
:getter: Computed on first use (cached)
:type: :class:`ost.seq.AlignmentList`
"""
if
self
.
_chem_group_alignments
is
None
:
self
.
_chem_group_alignments
,
self
.
_chem_group_types
=
\
_GetChemGroupAlignments
(
self
.
target
)
return
self
.
_chem_group_alignments
@property
def
chem_group_ref_seqs
(
self
):
"""
Reference (longest) sequence for each group in :attr:`~chem_groups`
:getter: Computed on first use (cached)
:type: :class:`ost.seq.SequenceList`
"""
if
self
.
_chem_group_ref_seqs
is
None
:
self
.
_chem_group_ref_seqs
=
seq
.
CreateSequenceList
()
for
a
in
self
.
chem_group_alignments
:
s
=
seq
.
CreateSequence
(
a
.
GetSequence
(
0
).
GetName
(),
a
.
GetSequence
(
0
).
GetGaplessString
())
self
.
_chem_group_ref_seqs
.
AddSequence
(
s
)
return
self
.
_chem_group_ref_seqs
@property
def
chem_group_types
(
self
):
"""
ChemType of each group in :attr:`~chem_groups`
Specifying if groups are poly-peptides/nucleotides, i.e.
:class:`ost.mol.ChemType.AMINOACIDS` or
:class:`ost.mol.ChemType.AMINOACIDS`
:getter: Computed on first use (cached)
:type: :class:`list` of :class:`ost.mol.ChemType`
"""
if
self
.
_chem_group_types
is
None
:
self
.
_chem_group_alignments
,
self
.
_chem_group_types
=
\
_GetChemGroupAlignments
(
self
.
target
)
return
self
.
_chem_group_types
def
GetChemMapping
(
self
,
model
):
"""
Maps sequences in *model* to chem_groups of target
:param model: Model from which to extract sequences, a
selection that only includes peptides and
nucleotides is performed.
:type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
:returns: Tuple with two lists of length `len(self.chem_groups)`:
1) Each element is a :class:`list` with mdl chain names that
map to the chem group at that position.
2) Each element is a :class:`ost.seq.AlignmentList` aligning
these mdl chain sequences to the chem group ref sequences.
"""
mdl
=
model
.
Select
(
"
peptide=true or nucleotide=true
"
)
mdl_pep_seqs
,
mdl_nuc_seqs
=
_GetAtomSeqs
(
mdl
)
mapping
=
[
list
()
for
x
in
self
.
chem_groups
]
alns
=
[
seq
.
AlignmentList
()
for
x
in
self
.
chem_groups
]
for
s
in
mdl_pep_seqs
:
idx
,
aln
=
_MapSequence
(
self
.
chem_group_ref_seqs
,
self
.
chem_group_types
,
s
,
mol
.
ChemType
.
AMINOACIDS
,
95.
,
0.1
,
seq
.
alg
.
BLOSUM100
,
-
5
,
-
2
)
if
idx
is
not
None
:
mapping
[
idx
].
append
(
s
.
GetName
())
alns
[
idx
].
append
(
aln
)
for
s
in
mdl_nuc_seqs
:
idx
,
aln
=
_MapSequence
(
self
.
chem_group_ref_seqs
,
self
.
chem_group_types
,
s
,
mol
.
ChemType
.
NUCLEOTIDES
,
95.
,
0.1
,
seq
.
alg
.
NUC44
,
-
4
,
-
4
)
if
idx
is
not
None
:
mapping
[
idx
].
append
(
s
.
GetName
())
alns
[
idx
].
append
(
aln
)
return
(
mapping
,
alns
)
def
GetNaivelDDTMapping
(
self
,
model
,
bb_only
=
False
,
inclusion_radius
=
15.0
,
thresholds
=
[
0.5
,
1.0
,
2.0
,
4.0
]):
"""
Naively iterates all possible chain mappings and returns the best
Maps *model* chain sequences to :attr:`~chem_groups` and performs all
possible permutations. The best mapping is selected based on lDDT score.
:param model: Model to map
:type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
:param bb_only: lDDT considers only atoms of name
"
CA
"
(peptides) or
"
C3
'"
(nucleotides). Gives speed improvement but
sidechains are not considered anymore.
:type bb_only: :class:`bool`
:param inclusion_radius: Inclusion radius for lDDT
:type inclusion_radius: :class:`float`
:param thresholds: Thresholds for lDDT
:type thresholds: :class:`list` of :class:`float`
:returns: A :class:`list` of :class:`list` that reflects
:attr:`~chem_groups` but is filled with the respective model
chains. Target chains without mapped model chains are set to
None.
"""
mdl
=
model
.
Select
(
"
peptide=true or nucleotide=true
"
)
chem_mapping
,
chem_group_alns
=
self
.
GetChemMapping
(
mdl
)
# check for the simplest case
only_one_to_one
=
True
for
ref_chains
,
mdl_chains
in
zip
(
self
.
chem_groups
,
chem_mapping
):
if
len
(
ref_chains
)
!=
1
or
len
(
mdl_chains
)
not
in
[
0
,
1
]:
only_one_to_one
=
False
break
if
only_one_to_one
:
# skip ref chem groups with no mapped mdl chain
return
[(
a
,
b
)
for
a
,
b
in
zip
(
self
.
chem_groups
,
chem_mapping
)
if
len
(
b
)
==
1
]
# alignment of each model chain to chem_group reference sequence
mdl_alns
=
dict
()
for
alns
in
chem_group_alns
:
for
aln
in
alns
:
mdl_chain_name
=
aln
.
GetSequence
(
1
).
GetName
()
mdl_alns
[
mdl_chain_name
]
=
aln
# Setup scoring
lddt_scorer
=
lddt
.
lDDTScorer
(
self
.
target
,
bb_only
=
bb_only
)
best_mapping
=
None
best_lddt
=
-
1.0
for
mapping
in
_ChainMappings
(
self
.
chem_groups
,
chem_mapping
):
# chain_mapping and alns as input for lDDT computation
lddt_chain_mapping
=
dict
()
lddt_alns
=
dict
()
for
ref_chem_group
,
mdl_chem_group
,
ref_aln
in
\
zip
(
self
.
chem_groups
,
mapping
,
self
.
chem_group_alignments
):
for
ref_ch
,
mdl_ch
in
zip
(
ref_chem_group
,
mdl_chem_group
):
# some mdl chains can be None
if
mdl_ch
is
not
None
:
lddt_chain_mapping
[
mdl_ch
]
=
ref_ch
# obtain alignments of mdl and ref chains towards chem
# group ref sequence and merge them
aln_list
=
seq
.
AlignmentList
()
# do ref aln
s1
=
ref_aln
.
GetSequence
(
0
)
s2
=
ref_aln
.
GetSequence
(
ref_chem_group
.
index
(
ref_ch
))
aln_list
.
append
(
seq
.
CreateAlignment
(
s1
,
s2
))
# do mdl aln
aln_list
.
append
(
mdl_alns
[
mdl_ch
])
# merge
ref_seq
=
seq
.
CreateSequence
(
s1
.
GetName
(),
s1
.
GetGaplessString
())
merged_aln
=
seq
.
alg
.
MergePairwiseAlignments
(
aln_list
,
ref_seq
)
# merged_aln:
# seq1: ref seq of chem group
# seq2: seq of ref chain
# seq3: seq of mdl chain
# => we need the alignment between seq2 and seq3
aln
=
seq
.
CreateAlignment
(
merged_aln
.
GetSequence
(
1
),
merged_aln
.
GetSequence
(
2
))
lddt_alns
[
mdl_ch
]
=
aln
lDDT
,
_
=
lddt_scorer
.
lDDT
(
mdl
,
thresholds
=
thresholds
,
chain_mapping
=
lddt_chain_mapping
,
residue_mapping
=
lddt_alns
,
check_resnames
=
False
)
if
lDDT
>
best_lddt
:
best_mapping
=
mapping
best_lddt
=
lDDT
return
best_mapping
def
_GetChemGroupAlignments
(
ent
,
seqid_thr
=
95.
,
gap_thr
=
0.1
):
"""
Returns alignments with groups of chemically equivalent chains
:param ent: Entity to process
:type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
:param seqid_thr: Threshold used to decide when two chains are identical.
95 percent tolerates the few mutations crystallographers
like to do.
:type seqid_thr: :class:`float`
:param gap_thr: Additional threshold to avoid gappy alignments with high
seqid. The reason for not just normalizing seqid by the
longer sequence is that one sequence might be a perfect
subsequence of the other but only cover half of it. This
threshold checks for a maximum allowed fraction of gaps
in any of the two sequences after stripping terminal gaps.
:type gap_thr: :class:`float`
:returns: Tuple with first element being an AlignmentList. Each alignment
represents a group of chemically equivalent chains and the first
sequence is the longest. Second element is a list of equivalent
length specifying the types of the groups. List elements are in
[:class:`ost.ChemType.AMINOACIDS`,
:class:`ost.ChemType.NUCLEOTIDES`]
:raises: :class:`RuntimeError` if ent contains any residue which doesn
'
t
evaluate True for `r.IsPeptideLinking() or r.IsNucleotideLinking()`
or when these two types are not strictly separated to different
chains.
"""
pep_seqs
,
nuc_seqs
=
_GetAtomSeqs
(
ent
)
pep_groups
=
_GroupSequences
(
pep_seqs
,
seqid_thr
,
gap_thr
,
seq
.
alg
.
BLOSUM100
,
-
5
,
-
2
)
nuc_groups
=
_GroupSequences
(
nuc_seqs
,
seqid_thr
,
gap_thr
,
seq
.
alg
.
NUC44
,
-
4
,
-
4
)
group_types
=
[
mol
.
ChemType
.
AMINOACIDS
]
*
len
(
pep_groups
)
group_types
+=
[
mol
.
ChemType
.
NUCLEOTIDES
]
*
len
(
nuc_groups
)
groups
=
pep_groups
groups
.
extend
(
nuc_groups
)
return
(
groups
,
group_types
)
def
_GetAtomSeqs
(
ent
):
"""
Extracts and returns atomseqs for polypeptide/nucleotide chains in ent
:param ent: Entity for which you want to extract atomseqs
:type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
:returns: Two lists, first containing atomseqs for all polypeptide chains,
the second is the same for polynucleotides
:raises: :class:`RuntimeError` if ent contains any residue which doesn
'
t
evaluate True for `r.IsPeptideLinking() or r.IsNucleotideLinking()`
or when these two types are not strictly separated in different
chains.
"""
polypep_seqs
=
seq
.
CreateSequenceList
()
polynuc_seqs
=
seq
.
CreateSequenceList
()
for
ch
in
ent
.
chains
:
n_res
=
len
(
ch
.
residues
)
n_pep
=
sum
([
r
.
IsPeptideLinking
()
for
r
in
ch
.
residues
])
n_nuc
=
sum
([
r
.
IsNucleotideLinking
()
for
r
in
ch
.
residues
])
# guarantee that we have either pep or nuc (no mix of the two)
if
n_pep
>
0
and
n_nuc
>
0
:
raise
RuntimeError
(
f
"
Must not mix peptide and nucleotide linking
"
f
"
residues in same chain (
{
ch
.
GetName
()
}
)
"
)
if
(
n_pep
+
n_nuc
)
!=
n_res
:
raise
RuntimeError
(
"
All residues must either be peptide_linking
"
"
or nucleotide_linking
"
)
s
=
''
.
join
([
r
.
one_letter_code
for
r
in
ch
.
residues
])
if
n_pep
==
n_res
:
polypep_seqs
.
AddSequence
(
seq
.
CreateSequence
(
ch
.
GetName
(),
s
))
elif
n_nuc
==
n_res
:
polynuc_seqs
.
AddSequence
(
seq
.
CreateSequence
(
ch
.
GetName
(),
s
))
else
:
raise
RuntimeError
(
"
This shouldnt happen
"
)
return
(
polypep_seqs
,
polynuc_seqs
)
def
_GroupSequences
(
seqs
,
seqid_thr
,
gap_thr
,
subst_mat
,
gap_open
,
gap_ext
):
"""
Get list of alignments representing groups of equivalent sequences
:param seqid_thr: Threshold used to decide when two chains are identical.
95 percent tolerates the few mutations crystallographers
like to do.
:type seqid_thr: :class:`float`
:param gap_thr: Additional threshold to avoid gappy alignments with high
seqid. The reason for not just normalizing seqid by the
longer sequence is that one sequence might be a perfect
subsequence of the other but only cover half of it. This
threshold checks for a maximum allowed fraction of gaps
in any of the two sequences after stripping terminal gaps.
:type gap_thr: :class:`float`
:param subst_mat: Substitution matrix for standard Needleman-Wunsch
:type subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
:param gap_open: Gap open penalty for standard Needleman-Wunsch
:type gap_open: :class:`int`
:param gap_ext: Gap extension penalty for standard Needleman-Wunsch
:type gap_ext: :class:`int`
:returns: A list of alignments, one alignment for each group
with longest sequence (reference) as first sequence.
:rtype: :class:`ost.seq.AlignmentList`
"""
sim_seqs
=
list
()
aln_cache
=
dict
()
# cache alignments to generate an MSA for each group
# at the very end
for
i
,
j
in
itertools
.
combinations
(
range
(
len
(
seqs
)),
2
):
aln
,
seqid
,
gap_frac_i
,
gap_frac_j
=
_Align
(
seqs
[
i
],
seqs
[
j
],
subst_mat
,
gap_open
,
gap_ext
)
if
seqid
>=
seqid_thr
and
gap_frac_i
<
gap_thr
and
gap_frac_j
<
gap_thr
:
sim_seqs
.
append
((
i
,
j
))
aln_cache
[(
i
,
j
)]
=
aln
# trivial case: no matching pairs
if
len
(
sim_seqs
)
==
0
:
aln_list
=
seq
.
AlignmentList
()
for
s
in
seqs
:
aln_list
.
append
(
seq
.
CreateAlignment
(
s
))
return
aln_list
# merge transitive pairs
groups
=
list
()
for
i
,
j
in
sim_seqs
:
found
=
False
for
g
in
groups
:
if
i
in
g
or
j
in
g
:
found
=
True
g
.
add
(
i
)
g
.
add
(
j
)
if
not
found
:
groups
.
append
(
set
([
i
,
j
]))
# sort based on sequence length
sorted_groups
=
list
()
for
g
in
groups
:
tmp
=
sorted
([[
len
(
seqs
[
i
]),
i
]
for
i
in
g
],
reverse
=
True
)
sorted_groups
.
append
([
x
[
1
]
for
x
in
tmp
])
# add all single chains
for
i
in
range
(
len
(
seqs
)):
if
not
any
(
i
in
g
for
g
in
sorted_groups
):
sorted_groups
.
append
([
i
])
# translate from indices back to sequences and directly generate alignments
# of the groups with the longest (first) sequence as reference
aln_list
=
seq
.
AlignmentList
()
for
g
in
sorted_groups
:
if
len
(
g
)
==
1
:
# aln with one single sequence
aln_list
.
append
(
seq
.
CreateAlignment
(
seqs
[
g
[
0
]]))
else
:
# obtain pairwise aln of first sequence (reference) to all others
alns
=
seq
.
AlignmentList
()
i
=
g
[
0
]
for
j
in
g
[
1
:]:
if
(
i
,
j
)
in
aln_cache
:
alns
.
append
(
aln_cache
[(
i
,
j
)])
else
:
# need to revert
aln
=
aln_cache
[(
j
,
i
)]
alns
.
append
(
seq
.
CreateAlignment
(
aln
.
GetSequence
(
1
),
aln
.
GetSequence
(
0
)))
# and merge
aln_list
.
append
(
seq
.
alg
.
MergePairwiseAlignments
(
alns
,
seqs
[
i
]))
return
aln_list
def
_MapSequence
(
ref_seqs
,
ref_types
,
s
,
s_type
,
seqid_thr
,
gap_thr
,
subst_mat
,
gap_open
,
gap_ext
):
"""
Tries top map *s* onto any of the sequences in *ref_seqs* of equal type
:param ref_seqs: Reference sequences
:type ref_seqs: :class:`ost.seq.SequenceList`
:param ref_types: Types of reference sequences, e.g.
ost.mol.ChemType.AminoAcids
:type ref_types: :class:`list` of :class:`ost.mol.ChemType`
:param s: Sequence to map
:type s: :class:`ost.seq.SequenceHandle`
:param s_type: Type of *s*, only try mapping to sequences in *ref_seqs*
with equal type as defined in *ref_types*
:param seqid_thr: Threshold used to decide whether sequence is mapped.
:type seqid_thr: :class:`float`
:param gap_thr: Additional threshold to avoid gappy alignments with high
seqid. The reason for not just normalizing seqid by the
longer sequence is that one sequence might be a perfect
subsequence of the other but only cover half of it. This
threshold checks for a maximum allowed fraction of gaps
in any of the two sequences after stripping terminal gaps.
:type gap_thr: :class:`float`
:param subst_mat: Substitution matrix for standard Needleman-Wunsch
:type subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
:param gap_open: Gap open penalty for standard Needleman-Wunsch
:type gap_open: :class:`int`
:param gap_ext: Gap extension penalty for standard Needleman-Wunsch
:type gap_ext: :class:`int`
:returns: Tuple with two elements. 1) index of sequence in *ref_seqs* to
which *s* can be mapped 2) Pairwise sequence alignment with
sequence from *ref_seqs* as first sequence. Both elements are
None if no mapping can be found.
:raises: :class:`RuntimeError` if mapping is ambiguous, i.e. *s*
successfully maps to more than one sequence in *ref_seqs*
"""
map_idx
=
None
map_aln
=
None
for
ref_idx
,
ref_seq
in
enumerate
(
ref_seqs
):
aln
,
seqid
,
gap_frac_i
,
gap_frac_j
=
_Align
(
ref_seq
,
s
,
subst_mat
,
gap_open
,
gap_ext
)
if
seqid
>=
seqid_thr
and
gap_frac_i
<
gap_thr
and
gap_frac_j
<
gap_thr
:
if
map_idx
is
not
None
:
# match is ambiguous!
raise
RuntimeError
(
f
"
Ambiguous mapping for mdl chain
"
f
"
{
s
.
GetName
()
}
. Maps to chemically
"
f
"
equivalent groups with ref sequences
"
f
"
{
ref_seqs
[
map_idx
].
GetName
()
}
and
"
f
"
{
ref_seqs
[
ref_idx
].
GetName
()
}
.
"
)
map_idx
=
ref_idx
map_aln
=
aln
return
(
map_idx
,
map_aln
)
def
_Align
(
s1
,
s2
,
subst_mat
,
gap_open
,
gap_ext
):
"""
Returns alignment and some distance metrics of that alignment
:param s1: First sequence to align
:type s1: :class:`ost.seq.SequenceHandle`
:param s2: Second sequence to align
:type s2: :class:`ost.seq.SequenceHandle`
:param subst_mat: Substitution matrix for standard Needleman-Wunsch
:type subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
:param gap_open: Gap open penalty for standard Needleman-Wunsch
:type gap_open: :class:`int`
:param gap_open: Gap extension penalty for standard Needleman-Wunsch
:type gap_open: :class:`int`
:returns: Tuple with 4 elements. 1) aln with s1 as first and s2 as
second sequence 2) sequence identify in range [0, 100]
considering aligned columns 3) Fraction of gaps between
first and last aligned column in s1 4) same for s2.
"""
aln
=
seq
.
alg
.
GlobalAlign
(
s1
,
s2
,
subst_mat
,
gap_open
=
gap_open
,
gap_ext
=
gap_ext
)[
0
]
seqid
=
seq
.
alg
.
SequenceIdentity
(
aln
)
n_gaps_1
=
str
(
aln
.
GetSequence
(
0
)).
strip
(
'
-
'
).
count
(
'
-
'
)
n_gaps_2
=
str
(
aln
.
GetSequence
(
1
)).
strip
(
'
-
'
).
count
(
'
-
'
)
gap_frac_1
=
float
(
n_gaps_1
)
/
len
(
aln
.
GetSequence
(
0
).
GetGaplessString
())
gap_frac_2
=
float
(
n_gaps_2
)
/
len
(
aln
.
GetSequence
(
1
).
GetGaplessString
())
return
(
aln
,
seqid
,
gap_frac_1
,
gap_frac_2
)
def
_RefSmallerGenerator
(
ref_chains
,
mdl_chains
):
"""
Returns all possible ways to map mdl_chains onto ref_chains
Specific for the case where len(ref_chains) < len(mdl_chains)
"""
for
c
in
itertools
.
combinations
(
mdl_chains
,
len
(
ref_chains
)):
for
p
in
itertools
.
permutations
(
c
):
yield
p
def
_RefLargerGenerator
(
ref_chains
,
mdl_chains
):
"""
Returns all possible ways to map mdl_chains onto ref_chains
Specific for the case where len(ref_chains) > len(mdl_chains)
Ref chains without mapped mdl chain are assigned None
"""
n_ref
=
len
(
ref_chains
)
n_mdl
=
len
(
mdl_chains
)
for
c
in
itertools
.
combinations
(
range
(
n_ref
),
n_mdl
):
for
p
in
itertools
.
permutations
(
mdl_chains
):
ret_list
=
[
None
]
*
n_ref
for
idx
,
ch
in
zip
(
c
,
p
):
ret_list
[
idx
]
=
ch
yield
ret_list
def
_ChainMappings
(
ref_chains
,
mdl_chains
):
"""
Returns all possible ways to map *mdl_chains* onto fixed *ref_chains*
:param ref_chains: List of list of chemically equivalent chains in reference
:type ref_chains: :class:`list` of :class:`list`
:param mdl_chains: Equally long list of list of chemically equivalent chains
in model that map on those ref chains.
:type mdl_chains: :class:`list` of :class:`list`
:returns: Iterator over all possible mappings of *mdl_chains* onto fixed
*ref_chains*. Potentially contains None as padding when number of
model chains for a certain mapping is smaller than the according
reference chains.
Example: _ChainMappings([[
'
A
'
,
'
B
'
,
'
C
'
], [
'
D
'
,
'
E
'
]],
[[
'
x
'
,
'
y
'
], [
'
i
'
,
'
j
'
]])
gives an iterator over: [([
'
x
'
,
'
y
'
, None], (
'
i
'
,
'
j
'
)),
([
'
x
'
,
'
y
'
, None], (
'
j
'
,
'
i
'
)),
([
'
y
'
,
'
x
'
, None], (
'
i
'
,
'
j
'
)),
([
'
y
'
,
'
x
'
, None], (
'
j
'
,
'
i
'
)),
([
'
x
'
, None,
'
y
'
], (
'
i
'
,
'
j
'
)),
([
'
x
'
, None,
'
y
'
], (
'
j
'
,
'
i
'
)),
([
'
y
'
, None,
'
x
'
], (
'
i
'
,
'
j
'
)),
([
'
y
'
, None,
'
x
'
], (
'
j
'
,
'
i
'
)),
([None,
'
x
'
,
'
y
'
], (
'
i
'
,
'
j
'
)),
([None,
'
x
'
,
'
y
'
], (
'
j
'
,
'
i
'
)),
([None,
'
y
'
,
'
x
'
], (
'
i
'
,
'
j
'
)),
([None,
'
y
'
,
'
x
'
], (
'
j
'
,
'
i
'
))]
"""
assert
(
len
(
ref_chains
)
==
len
(
mdl_chains
))
# one iterator per mapping representing all mdl combinations relative to
# reference
iterators
=
list
()
for
ref
,
mdl
in
zip
(
ref_chains
,
mdl_chains
):
if
len
(
ref
)
==
len
(
mdl
):
iterators
.
append
(
itertools
.
permutations
(
mdl
))
elif
len
(
ref
)
<
len
(
mdl
):
iterators
.
append
(
_RefSmallerGenerator
(
ref
,
mdl
))
else
:
iterators
.
append
(
_RefLargerGenerator
(
ref
,
mdl
))
return
itertools
.
product
(
*
iterators
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment