Skip to content
Snippets Groups Projects
Commit 02038207 authored by Bienchen's avatar Bienchen
Browse files

Add missing software

parent 8d0378b2
No related branches found
No related tags found
No related merge requests found
...@@ -2,12 +2,14 @@ Biopython ...@@ -2,12 +2,14 @@ Biopython
CIF CIF
DBs DBs
FastA FastA
HH-suite
Jupyter Jupyter
MSA MSA
ModelCIF ModelCIF
PAE PAE
PDB PDB
PPI PPI
Prefilled
coevolution coevolution
modeled modeled
modeling modeling
......
...@@ -31,6 +31,7 @@ import modelcif.protocol ...@@ -31,6 +31,7 @@ import modelcif.protocol
from alphapulldown.utils import make_dir_monomer_dictionary from alphapulldown.utils import make_dir_monomer_dictionary
# ToDo: Software versions can not have a white space, e.g. ColabFold (drop time)
# ToDo: DISCUSS Get options properly, best get the same names as used in # ToDo: DISCUSS Get options properly, best get the same names as used in
# existing scripts # existing scripts
# ToDo: Monomers work separately - features may come from different set of # ToDo: Monomers work separately - features may come from different set of
...@@ -738,6 +739,64 @@ def _get_software_data(meta_json: dict) -> list: ...@@ -738,6 +739,64 @@ def _get_software_data(meta_json: dict) -> list:
], ],
doi="10.1186/s12859-019-3019-7", doi="10.1186/s12859-019-3019-7",
) )
class _HHsuiteSW(modelcif.Software):
"""Prefilled software object for HH-suite tools."""
# We keep the parameter names from the parent class here, so let Pylint
# ignore redefining the 'type' builtin.
# pylint: disable=redefined-builtin
def __init__(
self,
name,
classification="data collection",
description="Iterative protein sequence searching by HMM-HMM "
+ "alignment",
location="https://github.com/soedinglab/hh-suite",
type="program",
version=None,
citation=cite_hhsuite,
):
"""Initialise a model"""
super().__init__(
name,
classification,
description,
location,
type,
version,
citation,
)
class _HmmerSW(modelcif.Software):
"""Prefilled software object for HMMER tools."""
# We keep the parameter names from the parent class here, so let Pylint
# ignore redefining the 'type' builtin.
# pylint: disable=redefined-builtin
def __init__(
self,
name,
classification="data collection",
description="Building HMM search profiles",
location="http://hmmer.org/",
type="program",
version=None,
citation=None,
):
"""Initialise a model"""
super().__init__(
name,
classification,
description,
location,
type,
version,
citation,
)
# {key from JSON: dict needed to produce software entry plus internal key} # {key from JSON: dict needed to produce software entry plus internal key}
sw_data = { sw_data = {
"AlphaFold": modelcif.Software( "AlphaFold": modelcif.Software(
...@@ -805,36 +864,41 @@ def _get_software_data(meta_json: dict) -> list: ...@@ -805,36 +864,41 @@ def _get_software_data(meta_json: dict) -> list:
doi="10.1093/bioinformatics/btac749", doi="10.1093/bioinformatics/btac749",
), ),
), ),
"hhblits": modelcif.Software( "hhblits": _HHsuiteSW("HHblits"),
"HHblits", "hhsearch": _HHsuiteSW(
"data collection",
"Iterative protein sequence searching by HMM-HMM alignment",
"https://github.com/soedinglab/hh-suite",
"program",
None,
cite_hhsuite,
),
"hhsearch": modelcif.Software(
"HHsearch", "HHsearch",
"data collection", description="Protein sequence searching by HMM-HMM comparison",
"Protein sequence searching by HMM-HMM comparison", ),
"https://github.com/soedinglab/hh-suite", "hmmbuild": _HmmerSW("hmmbuild"),
"program", "hmmsearch": _HmmerSW(
None, "hmmsearch",
cite_hhsuite, description="Search profile(s) against a sequence database",
), ),
"hmmbuild": modelcif.Software( "jackhmmer": _HmmerSW(
"hmmbuild", "jackhmmer",
description="Iteratively search sequence(s) against a sequence "
+ "database",
),
"kalign": modelcif.Software(
"kalign",
"data collection", "data collection",
"Building HMM search profiles", "Kalign is a fast multiple sequence alignment program for "
"http://hmmer.org/", + "biological sequences",
"https://github.com/timolassmann/kalign",
"program", "program",
None, None,
None, ihm.Citation(
pmid="31665271",
title="Kalign 3: multiple sequence alignment of large data "
+ "sets",
journal="Bioinformatics",
volume=36,
page_range=(1928, 1929),
year=2019,
authors=["Lassmann, T."],
doi="10.1093/bioinformatics/btz795",
),
), ),
"hmmsearch": None,
"jackhmmer": None,
"kalign": None,
} }
# ToDo: refactor to only those SW objects created/ added that are actually # ToDo: refactor to only those SW objects created/ added that are actually
# in the dictionary. That is, instead of a pre-build dictionary, # in the dictionary. That is, instead of a pre-build dictionary,
...@@ -862,6 +926,9 @@ def _get_software_data(meta_json: dict) -> list: ...@@ -862,6 +926,9 @@ def _get_software_data(meta_json: dict) -> list:
def _get_protocol_steps(modelcif_json): def _get_protocol_steps(modelcif_json):
"""Create the list of protocol steps with software and parameters used.""" """Create the list of protocol steps with software and parameters used."""
# ToDo: Get software_group from external input, right now the protocol steps
# are hard-coded here with the software per step. The JSON input does
# not list steps, only software.
protocol = [] protocol = []
# MSA/ monomer feature generation step # MSA/ monomer feature generation step
# ToDo: Discuss input, manual has baits & sequences # ToDo: Discuss input, manual has baits & sequences
...@@ -884,7 +951,6 @@ def _get_protocol_steps(modelcif_json): ...@@ -884,7 +951,6 @@ def _get_protocol_steps(modelcif_json):
# ToDo: Discuss input, seem to depend on mode # ToDo: Discuss input, seem to depend on mode
# ToDo: what about step details? Would it be nice to add the AlphaPulldown # ToDo: what about step details? Would it be nice to add the AlphaPulldown
# mode here? # mode here?
# ToDo: get software_group from external input
step = { step = {
"method_type": "modeling", "method_type": "modeling",
"step_name": None, "step_name": None,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment