Skip to content
Snippets Groups Projects
Commit 733a3323 authored by Bienchen's avatar Bienchen
Browse files

Add AlphaFOld DBs

parent 07cf74e2
Branches
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@
from timeit import default_timer as timer
import argparse
import datetime
import gzip
import os
import shutil
......@@ -494,9 +495,45 @@ def _get_sequence_dbs_colabfold(seq_dbs):
return [db_dict[seq_db] for seq_db in seq_dbs]
def _get_sequence_dbs_alphafold(seq_dbs):
"""Get AlphaFold seq. DBs."""
db_dict = {
"MGnify": modelcif.ReferenceDatabase(
"MGnify",
"https://storage.googleapis.com/alphafold-databases/"
+ "casp14_versions/mgy_clusters_2018_12.fa.gz",
version="2018_12",
release_date=datetime.datetime(2018, 12, 6),
),
"UniRef90": modelcif.ReferenceDatabase(
"UniRef90",
"ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/"
+ "uniref90.fasta.gz",
version=None,
release_date=None,
),
"BFD": modelcif.ReferenceDatabase(
"BFD",
"https://storage.googleapis.com/alphafold-databases/"
+ "casp14_versions/"
+ "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz",
version="6a634dc6eb105c2e9b4cba7bbae93412",
),
"Uniclust30": modelcif.ReferenceDatabase(
"Uniclust30",
"https://storage.googleapis.com/alphafold-databases/"
+ "casp14_versions/uniclust30_2018_08_hhsuite.tar.gz",
version="2018_08",
release_date=None,
),
}
return [db_dict[seq_db] for seq_db in seq_dbs]
def _get_modelcif_protocol_data(data_labels, target_entities, model, msa):
"""Assemble data for a ModelCIF protocol step."""
data = modelcif.data.DataGroup()
for label in data_labels:
if label == "target_sequences":
data.extend(target_entities)
......@@ -506,6 +543,12 @@ def _get_modelcif_protocol_data(data_labels, target_entities, model, msa):
data.extend(
_get_sequence_dbs_colabfold(["UniRef", "Environmental"])
)
elif label == "alphafold_reference_dbs":
data.extend(
_get_sequence_dbs_alphafold(
["MGnify", "UniRef90", "BFD", "Uniclust30"]
)
)
elif label == "msas":
data.append(msa)
else:
......@@ -684,7 +727,6 @@ def _get_protocol_steps_and_software_colabfold(config_data):
protocol = []
# MSA step
# Step 1 - MSA: Using default Colabfold databases with default parameters (colabfold_envdb_202108, uniref30_2202)
step = {
"method_type": "coevolution MSA",
"name": None,
......@@ -738,30 +780,50 @@ def _get_config_colabfold():
def _get_config_alphafold():
"""Get config variables for AlphaFold"""
description = "Predict model coordinates using AlphaFold."
af2_version = "2.2.0"
msa_description = (
"MSAs created for corresponding target sequence with AlphaFold using "
+ "default parameters."
)
return {"description": description}
mdl_description = (
f"Model generated using AlphaFold ({af2_version} with default "
+ "parameters) producing 5 models,ranked by pLDDT, starting from a the "
+ f"Alphafold {af2_version} produced MSA."
)
return {
"af2_version": af2_version,
"msa_description": msa_description,
"mdl_description": mdl_description,
}
def _get_protocol_steps_and_software_alphafold(config_data):
"""Get protocol steps for AF2 based models."""
protocol = []
# MSA generation
step = {
"method_type": "coevolution MSA",
"name": None,
"details": config_data["msa_description"],
"input": ["target_sequences", "alphafold_reference_dbs"],
"output": ["msas"],
"software": [_get_af2_software(config_data["af2_version"])],
"software_parameters": None,
}
protocol.append(step)
# modelling step
step = {
"method_type": "modeling",
"name": None,
"details": config_data["description"],
"details": config_data["mdl_description"],
"input": ["target_sequences"],
"output": ["model"],
"software": [_get_af2_software("2.2.0")],
"software_parameters": None,
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step["input"] = ["target_sequences"]
# get output data
# Must refer to existing data, so we try keywords
step["output"] = ["model"]
# get software
step["software"] = [_get_af2_software("2.2.0")]
step["software_parameters"] = None
protocol.append(step)
return protocol
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment