Skip to content
Snippets Groups Projects
Commit 2d44d68a authored by Christoph Stritt's avatar Christoph Stritt
Browse files

Wrapper for execution on sciCORE

parent 448f13b6
No related branches found
No related tags found
No related merge requests found
......@@ -13,6 +13,13 @@ The user needs to provide two things to run the workflow on her samples:
- a config file with some global options for the analysis
- a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads.
## Create conda environment containing snakemake and singularity
```
conda env create -f environment.yml
```
## config.yml
In the file config/config.yaml some global parameters can be set:
......
cluster:
mkdir -p logs/{rule} &&
sbatch
--partition={resources.partition}
--qos={resources.qos}
--cpus-per-task={threads}
--mem={resources.mem_mb}
--job-name=assemblySMK-{rule}-{wildcards}
--output=logs/{rule}/{rule}-{wildcards}-%j.stdout
--error=logs/{rule}/{rule}-{wildcards}-%j.stderr
default-resources:
- partition=scicore
- qos=1day
- time=12:00:00
- mem_mb=20000
restart-times: 3
max-jobs-per-second: 10
max-status-checks-per-second: 1
local-cores: 1
latency-wait: 60
jobs: 500
keep-going: True
rerun-incomplete: True
printshellcmds: True
scheduler: greedy
use-conda: True
# https://github.com/jdblischak/smk-simple-slurm
......@@ -4,14 +4,13 @@
samples: config/samples.tsv
outdir: ./results
output_prefix: pb_bernese
ref:
genome_size: 4.4m
gbf: resources/H37Rv.gbf
bakta_db: resources/bakta_db
container: container/assemblySC.sif
bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db
container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif
threads_per_job: 4
......
name: assemblySMK
channels:
- conda-forge
- bioconda
- defaults
- r
dependencies:
- snakemake=7.32.4
- singularity=3.8.6
#!/usr/bin/env python3
import argparse
import os
import yaml
import sys
def get_args():
parser = argparse.ArgumentParser(
description='')
parser.add_argument(
'-c', '--configfile',
dest='config',
required=True,
help='.'
)
parser.add_argument(
'-j', '--njobs',
dest="win_size",
required=True, type=int,
help='Window size.'
)
parser.add_argument(
'-t',
dest='threads',
type=int, default=0,
help='Threads per job.'
)
parser = argparse.ArgumentParser(description='Run PacBio HiFi assembly pipeline on sciCORE')
# Parameter groups
parser_io = parser.add_argument_group('INPUT/OUTPUT')
parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION')
# INPUT/OUTPUT
parser_io.add_argument('-s', '--samples', required=True, help='Path to tab-separeted table, no header, with sample name and path to fastq with HiFi reads.')
parser_io.add_argument('-o', '--outdir', required=True, help='Output directory for the results.')
# CLUSTER CONFIG
parser_cluster.add_argument('-j', '--njobs', default='4', help='Number of jobs to run in parallel. [4]')
parser_cluster.add_argument('-t', '--threads', default='10', help='Threads per job. [10]' )
args=parser.parse_args()
return args
def main():
args = get_args()
with open(args.config, 'r') as file:
config = yaml.safe_load(file)
# Infer pipeline location from path of run_assembly_pipeline.py
pl_path = os.path.dirname(os.path.abspath(sys.argv[0]))
print(pl_path)
# Directories for which singularity needs to be given access
bind_dirs = [
"/scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux",
"/scratch",
"/scicore/home/gagneux/GROUP/PacbioSnake_resources",
args.outdir,
pl_path
]
singularity_args = "--bind " + " --bind ".join(bind_dirs)
cmd = [
"snakemake",
"--profile", "",
"--snakefile", "/scicore/home/gagneux/GROUP/PacbioSnake/assembly/workflow/Snakefile",
"--directory", "/scicore/home/gagneux/GROUP/PacbioSnake/assembly",
"--configfile", "/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/config.yml",
"--jobs", "4",
"--latency-wait", "60",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--profile", pl_path + "/config/cluster_config.yaml",
# Overwrite samples and outdir parameters
"--config", "samples=" + args.samples,
"--config", "outdir=" + args.outdir,
"--jobs", args.njobs,
"--cleanup-shadow",
"--shadow-prefix",
"--verbose",
"--use-singularity", "--singularity-args", "--bind /scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux --bind /scicore/home/gagneux/stritt0001 --bind /scratch",
"--cluster", "sbatch --job-name=pbassembly --cpus-per-task=4 --mem-per-cpu=4G --time=06:00:00 --qos=6hours --output=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.o%j --error=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.e%j"
"--use-singularity",
"--singularity-args" + " \"" + singularity_args + "\""
]
#print(" ".join(cmd))
os.system(" ".join(cmd))
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment