diff --git a/assembly/README.md b/assembly/README.md index e5a56cc070f00d9b491176b421958cdff23a9831..8c339e5721c86fa4741a2b0c236bea8d961d7190 100755 --- a/assembly/README.md +++ b/assembly/README.md @@ -13,6 +13,13 @@ The user needs to provide two things to run the workflow on her samples: - a config file with some global options for the analysis - a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads. +## Create conda environment containing snakemake and singularity +``` +conda env create -f environment.yml + +``` + + ## config.yml In the file config/config.yaml some global parameters can be set: diff --git a/assembly/config/cluster_config.yaml b/assembly/config/cluster_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8eb21698259a77bfd2a075f95285bb9c955f5ad --- /dev/null +++ b/assembly/config/cluster_config.yaml @@ -0,0 +1,29 @@ +cluster: + mkdir -p logs/{rule} && + sbatch + --partition={resources.partition} + --qos={resources.qos} + --cpus-per-task={threads} + --mem={resources.mem_mb} + --job-name=assemblySMK-{rule}-{wildcards} + --output=logs/{rule}/{rule}-{wildcards}-%j.stdout + --error=logs/{rule}/{rule}-{wildcards}-%j.stderr +default-resources: + - partition=scicore + - qos=1day + - time=12:00:00 + - mem_mb=20000 +restart-times: 3 +max-jobs-per-second: 10 +max-status-checks-per-second: 1 +local-cores: 1 +latency-wait: 60 +jobs: 500 +keep-going: True +rerun-incomplete: True +printshellcmds: True +scheduler: greedy +use-conda: True + + +# https://github.com/jdblischak/smk-simple-slurm diff --git a/assembly/config/config.yaml b/assembly/config/config.yaml index 171ee8f746989a4e8f72d3b20b4e37709e23748f..9af1c0c7665a69956f1adacc03fa354521e44d4d 100755 --- a/assembly/config/config.yaml +++ b/assembly/config/config.yaml @@ -4,14 +4,13 @@ samples: config/samples.tsv outdir: ./results -output_prefix: pb_bernese ref: genome_size: 4.4m gbf: resources/H37Rv.gbf -bakta_db: resources/bakta_db -container: container/assemblySC.sif +bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db +container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif threads_per_job: 4 diff --git a/assembly/config/environment.yml b/assembly/config/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..f5e4dfb68a298668efd08fbd932411e9cf7f2d77 --- /dev/null +++ b/assembly/config/environment.yml @@ -0,0 +1,9 @@ +name: assemblySMK +channels: + - conda-forge + - bioconda + - defaults + - r +dependencies: + - snakemake=7.32.4 + - singularity=3.8.6 diff --git a/assembly/run_assembly_pipeline.py b/assembly/run_assembly_pipeline.py old mode 100644 new mode 100755 index 0c4985ce6a8ee28c8c870553f88397e4cdecbd42..388c61689988a8ac2eeeb7f139005c6a9f74435e --- a/assembly/run_assembly_pipeline.py +++ b/assembly/run_assembly_pipeline.py @@ -1,66 +1,70 @@ +#!/usr/bin/env python3 import argparse import os import yaml +import sys def get_args(): - parser = argparse.ArgumentParser( - description='') - - parser.add_argument( - '-c', '--configfile', - dest='config', - required=True, - help='.' - ) - - parser.add_argument( - '-j', '--njobs', - dest="win_size", - required=True, type=int, - help='Window size.' - ) - - parser.add_argument( - '-t', - dest='threads', - type=int, default=0, - help='Threads per job.' - ) + parser = argparse.ArgumentParser(description='Run PacBio HiFi assembly pipeline on sciCORE') + + # Parameter groups + parser_io = parser.add_argument_group('INPUT/OUTPUT') + + parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION') + + # INPUT/OUTPUT + parser_io.add_argument('-s', '--samples', required=True, help='Path to tab-separeted table, no header, with sample name and path to fastq with HiFi reads.') + + parser_io.add_argument('-o', '--outdir', required=True, help='Output directory for the results.') + + + # CLUSTER CONFIG + parser_cluster.add_argument('-j', '--njobs', default='4', help='Number of jobs to run in parallel. [4]') + + parser_cluster.add_argument('-t', '--threads', default='10', help='Threads per job. [10]' ) args=parser.parse_args() + return args def main(): args = get_args() - - with open(args.config, 'r') as file: - config = yaml.safe_load(file) - - + # Infer pipeline location from path of run_assembly_pipeline.py - - + pl_path = os.path.dirname(os.path.abspath(sys.argv[0])) + print(pl_path) + # Directories for which singularity needs to be given access + bind_dirs = [ + "/scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux", + "/scratch", + "/scicore/home/gagneux/GROUP/PacbioSnake_resources", + args.outdir, + pl_path + ] + + singularity_args = "--bind " + " --bind ".join(bind_dirs) cmd = [ "snakemake", - "--profile", "", - "--snakefile", "/scicore/home/gagneux/GROUP/PacbioSnake/assembly/workflow/Snakefile", - "--directory", "/scicore/home/gagneux/GROUP/PacbioSnake/assembly", - "--configfile", "/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/config.yml", - "--jobs", "4", - "--latency-wait", "60", + "--snakefile", pl_path + "/workflow/Snakefile", + "--directory", pl_path, + "--configfile", pl_path + "/config/config.yaml", + "--profile", pl_path + "/config/cluster_config.yaml", + # Overwrite samples and outdir parameters + "--config", "samples=" + args.samples, + "--config", "outdir=" + args.outdir, + "--jobs", args.njobs, "--cleanup-shadow", - "--shadow-prefix", - "--verbose", - "--use-singularity", "--singularity-args", "--bind /scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux --bind /scicore/home/gagneux/stritt0001 --bind /scratch", - "--cluster", "sbatch --job-name=pbassembly --cpus-per-task=4 --mem-per-cpu=4G --time=06:00:00 --qos=6hours --output=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.o%j --error=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.e%j" + "--use-singularity", + "--singularity-args" + " \"" + singularity_args + "\"" ] + #print(" ".join(cmd)) os.system(" ".join(cmd)) if __name__ == '__main__':