Skip to content
Snippets Groups Projects
Commit 72f09e40 authored by B13nch3n's avatar B13nch3n
Browse files

Test for missing items

parent 112d00ed
Branches
No related tags found
No related merge requests found
...@@ -39,7 +39,7 @@ def _parse_args(): ...@@ -39,7 +39,7 @@ def _parse_args():
"--test-file", "--test-file",
default=None, default=None,
action="store", action="store",
help="Only run test for this file.", help="Only run test for this ModelCIF/ mmCIF file.",
) )
parser.add_argument( parser.add_argument(
"-l", "-l",
...@@ -331,9 +331,32 @@ def _main(): ...@@ -331,9 +331,32 @@ def _main():
"status": "aborted", "status": "aborted",
"diagnosis": [], "diagnosis": [],
}, },
# missing items, key & mandatory items missing
"missing_items.cif": {
"ret_val": 2,
"errors": [],
"cifcheck-errors": [],
"status": "completed",
"diagnosis": [
'ERROR - In block "Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5", '
+ 'key item "entity_id" not found in category '
+ '"entity_poly_seq"',
'ERROR - In block "Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5", '
+ 'mandatory item "entity_id" is not in category '
+ '"entity_poly_seq"',
'ERROR - In block "Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5", '
+ 'key item "entity_id" not found in category '
+ '"pdbx_poly_seq_scheme"',
'ERROR - In block "Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5", '
+ 'mandatory item "entity_id" is not in category '
+ '"pdbx_poly_seq_scheme"',
'ERROR - In block "Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5", '
+ 'mandatory item "label_entity_id" is not in category '
+ '"atom_site"',
],
},
# duplicated item in _loop category # duplicated item in _loop category
# missing category (entity) # missing category (entity)
# missing item (struct_ref.db_name)
# parent-child relationship issue (remove an atom_type.symbol) # parent-child relationship issue (remove an atom_type.symbol)
} }
......
data_Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5
_entry.id Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5
_struct.entry_id Q9Y5J9-Q9Y5L4_UNRELAXED_RANK_1_MODEL_5
_struct.pdbx_model_details 'Dimer model generated for TIMM8B and TIMM13, produced using AlphaFold-Multimer (AlphaFold v2.2.0) as implemented by ColabFold (v1.2.0) which uses MMseqs2 for MSA generation (UniRef30 + Environmental).'
_struct.pdbx_structure_determination_methodology computational
_struct.title 'Predicted interaction between TIMM8B and TIMM13'
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/557bda7/base/mmcif_ma-core.dic
_audit_conform.dict_name mmcif_ma.dic
_audit_conform.dict_version 1.4.1
#
loop_
_citation.id
_citation.title
_citation.journal_abbrev
_citation.journal_volume
_citation.page_first
_citation.page_last
_citation.year
_citation.pdbx_database_id_PubMed
_citation.pdbx_database_id_DOI
1 'ColabFold: making protein folding accessible to all.' 'Nature Methods' 19 679
682 2022 35637307 10.1038/s41592-022-01488-1
2
'MMseqs2 desktop and local web server app for fast, interactive sequence searches.'
Bioinformatics 35 2856 2858 2019 30615063 10.1093/bioinformatics/bty1057
3 'Protein complex prediction with AlphaFold-Multimer.' bioRxiv . . . 2021 .
10.1101/2021.10.04.463034
#
#
loop_
_citation_author.citation_id
_citation_author.name
_citation_author.ordinal
1 'Mirdita, M.' 1
1 'Schuetze, K.' 2
1 'Moriwaki, Y.' 3
1 'Heo, L.' 4
1 'Ovchinnikov, S.' 5
1 'Steinegger, M.' 6
2 'Mirdita, M.' 7
2 'Steinegger, M.' 8
2 'Soeding, J.' 9
3 'Evans, R.' 10
3 "O'Neill, M." 11
3 'Pritzel, A.' 12
3 'Antropova, N.' 13
3 'Senior, A.' 14
3 'Green, T.' 15
3 'Zidek, A.' 16
3 'Bates, R.' 17
3 'Blackwell, S.' 18
3 'Yim, J.' 19
3 'Ronneberger, O.' 20
3 'Bodenstein, S.' 21
3 'Zielinski, M.' 22
3 'Bridgland, A.' 23
3 'Potapenko, A.' 24
3 'Cowie, A.' 25
3 'Tunyasuvunakool, K.' 26
3 'Jain, R.' 27
3 'Clancy, E.' 28
3 'Kohli, P.' 29
3 'Jumper, J.' 30
3 'Hassabis, D.' 31
#
#
loop_
_software.pdbx_ordinal
_software.name
_software.classification
_software.description
_software.version
_software.type
_software.location
_software.citation_id
1 ColabFold 'model building' 'Structure prediction' 1.2.0 package
https://github.com/sokrypton/ColabFold 1
2 MMseqs2 'data collection' 'Many-against-Many sequence searching' . package
https://github.com/soedinglab/mmseqs2 2
3 AlphaFold-Multimer 'model building' 'Structure prediction' . package
https://github.com/deepmind/alphafold 3
#
#
loop_
_ma_software_parameter.parameter_id
_ma_software_parameter.group_id
_ma_software_parameter.data_type
_ma_software_parameter.name
_ma_software_parameter.value
_ma_software_parameter.description
1 1 boolean use_templates NO .
2 1 boolean use_amber NO .
3 1 string msa_mode 'MMseqs2 (UniRef+Environmental)' .
4 1 string model_type AlphaFold2-multimer-v2 .
5 1 integer num_models 5 .
6 1 integer num_recycles 3 .
7 1 integer-csv model_order 3,4,5,1,2 .
8 1 boolean keep_existing_results YES .
9 1 string rank_by multimer .
10 1 string pair_mode unpaired+paired .
11 1 string host_url https://api.colabfold.com .
12 1 integer 'stop_at_score' 100 .
13 1 float recompile_padding 1.100 .
14 1 boolean recompile_all_models YES .
15 1 string commit b532e910b15434f707f0b7460abc25c70fcb9b26 .
16 1 string version 1.2.0 .
#
#
loop_
_ma_software_group.ordinal_id
_ma_software_group.group_id
_ma_software_group.software_id
_ma_software_group.parameter_group_id
1 1 1 1
2 1 2 1
3 1 3 1
#
#
loop_
_audit_author.name
_audit_author.pdbx_ordinal
'Bartolec, T.K.' 1
'Vazquez-Campos, X.' 2
'Norman, A.' 3
'Luong, C.' 4
'Payne, R.J.' 5
'Wilkins, M.R.' 6
'Mackay, J.P.' 7
'Low, J.K.K.' 8
#
#
loop_
_chem_comp.id
_chem_comp.type
_chem_comp.name
_chem_comp.formula
_chem_comp.formula_weight
_chem_comp.ma_provenance
ALA 'L-peptide linking' ALANINE 'C3 H7 N O2' 89.094 'CCD Core'
ARG 'L-peptide linking' ARGININE 'C6 H15 N4 O2 1' 175.212 'CCD Core'
ASN 'L-peptide linking' ASPARAGINE 'C4 H8 N2 O3' 132.119 'CCD Core'
ASP 'L-peptide linking' 'ASPARTIC ACID' 'C4 H7 N O4' 133.103 'CCD Core'
CYS 'L-peptide linking' CYSTEINE 'C3 H7 N O2 S' 121.154 'CCD Core'
GLN 'L-peptide linking' GLUTAMINE 'C5 H10 N2 O3' 146.146 'CCD Core'
GLU 'L-peptide linking' 'GLUTAMIC ACID' 'C5 H9 N O4' 147.130 'CCD Core'
GLY 'peptide linking' GLYCINE 'C2 H5 N O2' 75.067 'CCD Core'
HIS 'L-peptide linking' HISTIDINE 'C6 H10 N3 O2 1' 156.165 'CCD Core'
ILE 'L-peptide linking' ISOLEUCINE 'C6 H13 N O2' 131.175 'CCD Core'
LEU 'L-peptide linking' LEUCINE 'C6 H13 N O2' 131.175 'CCD Core'
LYS 'L-peptide linking' LYSINE 'C6 H15 N2 O2 1' 147.198 'CCD Core'
MET 'L-peptide linking' METHIONINE 'C5 H11 N O2 S' 149.208 'CCD Core'
PHE 'L-peptide linking' PHENYLALANINE 'C9 H11 N O2' 165.192 'CCD Core'
PRO 'L-peptide linking' PROLINE 'C5 H9 N O2' 115.132 'CCD Core'
SER 'L-peptide linking' SERINE 'C3 H7 N O3' 105.093 'CCD Core'
THR 'L-peptide linking' THREONINE 'C4 H9 N O3' 119.120 'CCD Core'
TRP 'L-peptide linking' TRYPTOPHAN 'C11 H12 N2 O2' 204.229 'CCD Core'
TYR 'L-peptide linking' TYROSINE 'C9 H11 N O3' 181.191 'CCD Core'
VAL 'L-peptide linking' VALINE 'C5 H11 N O2' 117.148 'CCD Core'
#
#
loop_
_entity.id
_entity.type
_entity.src_method
_entity.pdbx_description
_entity.formula_weight
_entity.pdbx_number_of_molecules
_entity.details
1 polymer nat 'Homo sapiens (Human) TIMM8B (Q9Y5J9)' 10831.880 1 .
2 polymer nat 'Homo sapiens (Human) TIMM13 (Q9Y5L4)' 12206.562 1 .
#
#
loop_
_entity_src_nat.entity_id
_entity_src_nat.pdbx_src_id
_entity_src_nat.pdbx_ncbi_taxonomy_id
_entity_src_nat.pdbx_organism_scientific
_entity_src_nat.common_name
_entity_src_nat.strain
1 1 9606 'Homo sapiens (Human)' . .
2 1 9606 'Homo sapiens (Human)' . .
#
#
loop_
_ma_target_ref_db_details.target_entity_id
_ma_target_ref_db_details.db_name
_ma_target_ref_db_details.db_name_other_details
_ma_target_ref_db_details.db_code
_ma_target_ref_db_details.db_accession
_ma_target_ref_db_details.seq_db_isoform
_ma_target_ref_db_details.seq_db_align_begin
_ma_target_ref_db_details.seq_db_align_end
_ma_target_ref_db_details.ncbi_taxonomy_id
_ma_target_ref_db_details.organism_scientific
_ma_target_ref_db_details.seq_db_sequence_version_date
_ma_target_ref_db_details.seq_db_sequence_checksum
1 UNP . TIM8B_HUMAN Q9Y5J9 . 1 83 9606 'Homo sapiens (Human)' 1999-11-01
9DC47BB475DB8692
2 UNP . TIM13_HUMAN Q9Y5L4 . 1 95 9606 'Homo sapiens (Human)' 1999-11-01
E40E742C7CA55834
#
#
loop_
_entity_poly.entity_id
_entity_poly.pdbx_seq_one_letter_code
_entity_poly.pdbx_seq_one_letter_code_can
1 MAE MAE
2 MEG MEG
#
#
loop_
_entity_poly_seq.num
_entity_poly_seq.mon_id
_entity_poly_seq.hetero
1 MET .
2 ALA .
3 GLU .
1 MET .
2 GLU .
3 GLY .
#
#
loop_
_struct_asym.id
_struct_asym.entity_id
_struct_asym.details
A 1 .
B 2 .
#
#
loop_
_pdbx_poly_seq_scheme.asym_id
_pdbx_poly_seq_scheme.seq_id
_pdbx_poly_seq_scheme.mon_id
_pdbx_poly_seq_scheme.pdb_seq_num
_pdbx_poly_seq_scheme.auth_seq_num
_pdbx_poly_seq_scheme.pdb_mon_id
_pdbx_poly_seq_scheme.auth_mon_id
_pdbx_poly_seq_scheme.pdb_strand_id
_pdbx_poly_seq_scheme.pdb_ins_code
A 1 MET 1 1 MET MET A .
A 2 ALA 2 2 ALA ALA A .
A 3 GLU 3 3 GLU GLU A .
B 1 MET 1 1 MET MET B .
B 2 GLU 2 2 GLU GLU B .
B 3 GLY 3 3 GLY GLY B .
#
#
loop_
_ma_data.id
_ma_data.name
_ma_data.content_type
_ma_data.content_type_other_details
1 'Homo sapiens (Human) TIMM8B (Q9Y5J9)' target .
2 'Homo sapiens (Human) TIMM13 (Q9Y5L4)' target .
3 'Model 5 (top ranked model)' 'model coordinates' .
4 UniRef30 'reference database' .
5 'ColabFold DB' 'reference database' .
#
#
loop_
_ma_data_group.ordinal_id
_ma_data_group.group_id
_ma_data_group.data_id
1 1 1
2 1 2
3 1 4
4 1 5
5 2 3
#
#
loop_
_ma_data_ref_db.data_id
_ma_data_ref_db.name
_ma_data_ref_db.location_url
_ma_data_ref_db.version
_ma_data_ref_db.release_date
4 UniRef30 http://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2103.tar.gz
2021_03 .
5 'ColabFold DB'
http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz 2021_08
.
#
#
loop_
_ma_target_entity.entity_id
_ma_target_entity.data_id
_ma_target_entity.origin
1 1 'reference database'
2 2 'reference database'
#
#
loop_
_ma_target_entity_instance.asym_id
_ma_target_entity_instance.entity_id
_ma_target_entity_instance.details
A 1 .
B 2 .
#
#
loop_
_ma_protocol_step.ordinal_id
_ma_protocol_step.protocol_id
_ma_protocol_step.step_id
_ma_protocol_step.method_type
_ma_protocol_step.step_name
_ma_protocol_step.details
_ma_protocol_step.software_group_id
_ma_protocol_step.input_data_group_id
_ma_protocol_step.output_data_group_id
1 1 1 modeling .
'Model generated using ColabFold v1.2.0 with AlphaFold-Multimer (v2) producing 5 models with 3 recycles each, without model relaxation, without templates, ranked by ipTM*0.8+pTM*0.2, starting from paired and unpaired MSAs from MMseqs2 (UniRef+Environmental).'
1 1 2
2 1 2 'model selection' .
'Select best model, which is either the top-ranked model as determined by the ColabFold pipeline (ipTM*0.8+pTM*0.2), or else the model with best congruence with crosslinks reported in the related study.'
. 2 2
#
#
loop_
_ma_model_list.ordinal_id
_ma_model_list.model_id
_ma_model_list.model_group_id
_ma_model_list.model_name
_ma_model_list.model_group_name
_ma_model_list.data_id
_ma_model_list.model_type
_ma_model_list.model_type_other_details
1 1 1 'Model 5 (top ranked model)'
'Crosslinked Heterodimer AlphaFold-Multimer v2 Models' 3 'Ab initio model' .
#
#
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_seq_id
_atom_site.auth_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.label_asym_id
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.auth_asym_id
_atom_site.B_iso_or_equiv
_atom_site.pdbx_PDB_model_num
ATOM 1 N N . MET 1 1 ? A 8.317 39.011 19.688 1.000 A 42.340 1
ATOM 2 C CA . MET 1 1 ? A 8.849 37.725 19.245 1.000 A 42.340 1
ATOM 3 C C . MET 1 1 ? A 7.739 36.684 19.142 1.000 A 42.340 1
ATOM 4 O O . MET 1 1 ? A 7.973 35.565 18.682 1.000 A 42.340 1
ATOM 5 C CB . MET 1 1 ? A 9.938 37.236 20.200 1.000 A 42.340 1
ATOM 6 C CG . MET 1 1 ? A 11.341 37.665 19.802 1.000 A 42.340 1
ATOM 7 S SD . MET 1 1 ? A 12.633 36.531 20.445 1.000 A 42.340 1
ATOM 8 C CE . MET 1 1 ? A 13.720 37.723 21.276 1.000 A 42.340 1
ATOM 9 N N . ALA 2 2 ? A 6.663 36.923 19.908 1.000 A 50.210 1
ATOM 10 C CA . ALA 2 2 ? A 5.472 36.082 19.989 1.000 A 50.210 1
ATOM 11 C C . ALA 2 2 ? A 4.632 36.197 18.719 1.000 A 50.210 1
ATOM 12 O O . ALA 2 2 ? A 3.979 35.234 18.311 1.000 A 50.210 1
ATOM 13 C CB . ALA 2 2 ? A 4.637 36.457 21.211 1.000 A 50.210 1
ATOM 14 N N . GLU 3 3 ? A 4.548 37.401 18.095 1.000 A 53.120 1
ATOM 15 C CA . GLU 3 3 ? A 3.660 37.590 16.952 1.000 A 53.120 1
ATOM 16 C C . GLU 3 3 ? A 4.168 36.832 15.728 1.000 A 53.120 1
ATOM 17 O O . GLU 3 3 ? A 3.379 36.423 14.874 1.000 A 53.120 1
ATOM 18 C CB . GLU 3 3 ? A 3.513 39.078 16.625 1.000 A 53.120 1
ATOM 19 C CG . GLU 3 3 ? A 2.318 39.739 17.296 1.000 A 53.120 1
ATOM 20 C CD . GLU 3 3 ? A 2.060 41.155 16.803 1.000 A 53.120 1
ATOM 21 O OE1 . GLU 3 3 ? A 1.079 41.369 16.056 1.000 A 53.120 1
ATOM 22 O OE2 . GLU 3 3 ? A 2.848 42.057 17.167 1.000 A 53.120 1
ATOM 652 N N . MET 1 1 ? B 50.040 32.393 35.390 1.000 B 28.570 1
ATOM 653 C CA . MET 1 1 ? B 49.521 31.790 36.614 1.000 B 28.570 1
ATOM 654 C C . MET 1 1 ? B 48.376 32.619 37.186 1.000 B 28.570 1
ATOM 655 O O . MET 1 1 ? B 47.433 32.071 37.759 1.000 B 28.570 1
ATOM 656 C CB . MET 1 1 ? B 50.632 31.645 37.655 1.000 B 28.570 1
ATOM 657 C CG . MET 1 1 ? B 50.733 30.251 38.253 1.000 B 28.570 1
ATOM 658 S SD . MET 1 1 ? B 52.198 30.058 39.341 1.000 B 28.570 1
ATOM 659 C CE . MET 1 1 ? B 51.684 28.617 40.317 1.000 B 28.570 1
ATOM 660 N N . GLU 2 2 ? B 48.540 33.870 37.053 1.000 B 35.420 1
ATOM 661 C CA . GLU 2 2 ? B 47.501 34.894 37.106 1.000 B 35.420 1
ATOM 662 C C . GLU 2 2 ? B 46.554 34.783 35.915 1.000 B 35.420 1
ATOM 663 O O . GLU 2 2 ? B 45.932 35.769 35.515 1.000 B 35.420 1
ATOM 664 C CB . GLU 2 2 ? B 48.124 36.291 37.154 1.000 B 35.420 1
ATOM 665 C CG . GLU 2 2 ? B 47.783 37.074 38.414 1.000 B 35.420 1
ATOM 666 C CD . GLU 2 2 ? B 48.552 38.380 38.535 1.000 B 35.420 1
ATOM 667 O OE1 . GLU 2 2 ? B 47.992 39.448 38.199 1.000 B 35.420 1
ATOM 668 O OE2 . GLU 2 2 ? B 49.725 38.335 38.969 1.000 B 35.420 1
ATOM 669 N N . GLY 3 3 ? B 46.387 33.521 35.322 1.000 B 37.540 1
ATOM 670 C CA . GLY 3 3 ? B 45.732 33.310 34.041 1.000 B 37.540 1
ATOM 671 C C . GLY 3 3 ? B 44.283 33.761 34.030 1.000 B 37.540 1
ATOM 672 O O . GLY 3 3 ? B 43.519 33.433 34.939 1.000 B 37.540 1
#
#
loop_
_atom_type.symbol
C
N
O
S
#
...@@ -776,7 +776,9 @@ class _CifCheck: ...@@ -776,7 +776,9 @@ class _CifCheck:
# missing items # missing items
for pttrn in [ for pttrn in [
r"^ERROR - In block \"(?P<dblock>.*)\", mandatory " r"^ERROR - In block \"(?P<dblock>.*)\", mandatory "
+ r"item \"(?P<itm>.*)\" is not in category \"(?P<cat>.*)\"$" + r"item \"(?P<itm>.*)\" is not in category \"(?P<cat>.*)\"$",
r"ERROR - In block \"(?P<dblock>.*)\", key item "
+ r"\"(?P<itm>.*)\" not found in category \"(?P<cat>.*)\"$",
]: ]:
match = re.match(pttrn, line) match = re.match(pttrn, line)
if match is not None: if match is not None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment