Skip to content
Snippets Groups Projects
Snakefile 6.94 KiB
Newer Older
Terezia Dorcakova's avatar
Terezia Dorcakova committed

import itertools, pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, openbabel, openbabel.pybel, scipy, scipy.stats, openbabel.pybel

from modules import *

rule smiles_dude:
    input:
        smi_actives = 'resources/DHODH_DUD-E/pyrd/actives_final.ism',
        smi_decoys = 'resources/DHODH_DUD-E/pyrd/decoys_final.ism',
    output:
        smi_actives = 'results/smiles/dude_actives.smi',
        smi_decoys = 'results/smiles/dude_decoys.smi',
    params:
        random_source = 'resources/DHODH_DUD-E/pyrd.tar.gz',
    shell: """
        cp {input.smi_actives} {output.smi_actives}
        shuf -n 645 --random-source={params.random_source} {input.smi_decoys} -o {output.smi_decoys}
    """

localrules: rdconf

rule rdconf:
    input:
        smi = 'results/smiles/{compounds_id}.smi',
    output:
        sdf = 'results/rdconf/{compounds_id}.sdf',
    shell: """
        python software/rdkit-scripts/rdconf.py --maxconfs 1 {input.smi} {output.sdf}
    """

# https://doi.org/10.1038/s41598-021-91069-7
# https://onlinelibrary.wiley.com/doi/full/10.1002/jhet.3644
# https://www.nature.com/articles/s41598-022-23006-1
# https://www.sciencedirect.com/science/article/pii/S0022286020310176
# https://www.mdpi.com/1420-3049/27/12/3660
pdb_lig = {
    '1d3g': 'BRE',
    '1d3h': 'A26',
    '4igh': '1EA',
    '6gk0': 'F1W',
    '6j3c': 'B5X',
    '5zf4': '9BL',
    '3g0u': 'MDY',
    '4oqv': '2V6',
    '3u2o': '03U',
    '6oc0': 'M4J',
}

localrules: pdb

rule pdb:
    output:
        'results/pdb/{struct_id}.pdb',
        temp('results/pdb/{struct_id}_fetch.pdb'),
        temp('results/pdb/{struct_id}_pdbfixer.pdb'),
        temp('results/pdb/{struct_id}_pdbfixer_aligned.pdb'),
    params:
        delres = 'SO4,GOL,CL,ACY,ACT,LDA,ZWI,DDQ,NA,DOR',
        lig = lambda wc: pdb_lig[wc.struct_id],
        ref = 'resources/alphafill/AF-Q02127-F1-model_v2-FMN.pdb'
    shell: """
        pdb_fetch -biounit {wildcards.struct_id} | pdb_delresname -HOH > results/pdb/{wildcards.struct_id}_fetch.pdb
        pdbfixer results/pdb/{wildcards.struct_id}_fetch.pdb --add-residues --add-atoms=heavy --output=results/pdb/{wildcards.struct_id}_pdbfixer.pdb
        cd results/pdb
        prody align ../../{params.ref} {wildcards.struct_id}_pdbfixer.pdb
        cd ../..
        pdb_delresname -{params.delres},{params.lig},ORO results/pdb/{wildcards.struct_id}_pdbfixer_aligned.pdb > results/pdb/{wildcards.struct_id}.pdb
        pdb_selresname -{params.lig} results/pdb/{wildcards.struct_id}_pdbfixer_aligned.pdb > results/pdb/{wildcards.struct_id}_lig.pdb
    """

rule reduce:
    input:
        pdb = 'results/pdb/{struct_id}.pdb',
    output:
        pdb = 'results/pdb.reduce/{struct_id}.pdb',
        fix = 'results/pdb.reduce/{struct_id}.fix.pdb',
        pdbqt = 'results/pdb.reduce/{struct_id}.pdbqt',
    shell: """
        #export REDUCE_HET_DICT=/cluster/project/beltrao/jjaenes/software/miniconda3/envs/adfr-suite/bin/reduce_wwPDB_het_dict.txt
        #conda run -p /cluster/project/beltrao/jjaenes/software/miniconda3/envs/adfr-suite reduce {input.pdb} > {output.pdb}
        export REDUCE_HET_DICT=software/reduce/reduce_wwPDB_het_dict.txt
        software/reduce/reduce_src/reduce -FLIP {input.pdb} > {output.pdb} || true
        pdbfixer {output.pdb} --output={output.fix}
        obabel {output.fix} -xr --partialcharge gasteiger -O{output.pdbqt}
    """

rule smina:
    input:
        sdf = 'results/rdconf/{compounds_id}.sdf',
        pdb = 'results/pdb/{struct_id}.pdb',
    output:
        sdf = 'results/pdb.smina/{struct_id}+{compounds_id}.sdf',
    threads: 64
    shell: """
        software/bin/smina --cpu {threads}\
            --ligand {input.sdf}\
            --receptor {input.pdb}\
            --autobox_ligand {input.pdb}\
            --out {output.sdf}\
            --exhaustiveness 64\
            --seed 4
    """

rule gnina:
    input:
        sdf = 'results/rdconf/{compounds_id}.sdf',
        pdb = 'results/pdb/{struct_id}.pdb',
    output:
        sdf = 'results/pdb.gnina/{struct_id}+{compounds_id}.sdf',
    threads: 64
    shell: """
        module load gcc/9.3.0; software/bin/gnina --cpu {threads}\
            --ligand {input.sdf}\
            --receptor {input.pdb}\
            --autobox_ligand {input.pdb}\
            --out {output.sdf}\
            --exhaustiveness 64\
            --seed 4 --no_gpu --cnn crossdock_default2018
    """

localrules: lig_decoys_plot

rule lig_decoys_plot:
    input:
        sdf = [ 'results/pdb.smina/{struct_id}+%s.sdf' % (compounds_id,) for compounds_id in read_dude()['compounds_id'] ],
    output:
        png = 'results/pdb.smina.plot_lig_decoys/{struct_id}.png',
        tsv = 'results/pdb.smina.plot_lig_decoys/{struct_id}.tsv',
    run:
        df_ = read_dude()
        df_['sdf_out'] = list(input.sdf)

        df_scores_ = pd.concat([ read_affinities_dude(r.sdf_out, r.compounds_id) for i, r in df_.iterrows() ], axis=0)
        fig, ax = plt.subplots(1, 2, sharey=True, figsize=(10,8))
        order_ = df_scores_.query('~is_decoy').sort_values('decoyNormalisedAffinity', ascending=False)['label'].to_list()
        sns.violinplot(data=df_scores_, x='minimizedAffinity', y='label', hue='is_decoy', ax=ax[0], split=True, cut=0, inner='stick', order=order_)#, color='tab:blue')
        sns.barplot(data=df_scores_.query('~is_decoy'), x='decoyNormalisedAffinity', y='label', ax=ax[1], color='tab:blue', order=order_)
        #ax[1].axvline(1.25, color='tab:red')
        fig.savefig(output.png, bbox_inches='tight')
        df_scores_.to_csv(output.tsv, sep='\t', header=True, index=False)

rule all:
    """
        profile_euler/run_local --dry-run
        profile_euler/run_sbatch --dry-run
    """
    default_target: True
    input:
        #[f'results/pdb.smina/{struct_id}+dude_actives.sdf' for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.smina/{struct_id}+dude_decoys.sdf' for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.smina/{struct_id}+DHODH_ChEMBL.sdf'  for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.smina/{struct_id}+DHODH_inhibitors.sdf'  for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.smina/{struct_id}+DHODH_inhibitors_pubchem.sdf'  for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.gnina/{struct_id}+dude_actives.sdf' for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.gnina/{struct_id}+dude_decoys.sdf' for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.gnina/{struct_id}+DHODH_ChEMBL.sdf'  for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.gnina/{struct_id}+DHODH_inhibitors.sdf' for struct_id in pdb_lig.keys() ],
        #[f'results/pdb.gnina/{struct_id}+DHODH_inhibitors_pubchem.sdf' for struct_id in pdb_lig.keys() ],
        #expand('results/pdb.smina/{struct_id}+{compounds_id}.sdf', struct_id=pdb_lig.keys(), compounds_id=read_dude()['compounds_id']),
        #expand('results/pdb.gnina/{struct_id}+{compounds_id}.sdf', struct_id=pdb_lig.keys(), compounds_id=read_dude()['compounds_id']),
        expand('results/pdb.smina.plot_lig_decoys/{struct_id}.png', struct_id=pdb_lig.keys())