Skip to content
Snippets Groups Projects
Commit 0d6ddf81 authored by Christoph Stritt's avatar Christoph Stritt
Browse files

Merge branch 'scicore' of https://git.scicore.unibas.ch/TBRU/PacbioSnake into scicore

parents 83c8980a 752f06bb
No related branches found
No related tags found
No related merge requests found
......@@ -7,3 +7,5 @@ assembly/resources/bakta_db
facienda.md
variantcalling/container/pggb_latest.sif
variantcalling/.snakemake
assembly/logs
assembly/.fontconfig
......@@ -13,14 +13,18 @@ The user needs to provide two things to run the workflow on her samples:
- a config file with some global options for the analysis
- a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads.
## Clone the directory
## Create conda environment containing snakemake and singularity
```
conda env create -f environment.yml
conda env create -f config/environment.yml
```
## config.yml
## Run the pipeline
In the file config/config.yaml some global parameters can be set:
```yaml
......
......@@ -9,16 +9,16 @@ cluster:
--output=logs/{rule}/{rule}-{wildcards}-%j.stdout
--error=logs/{rule}/{rule}-{wildcards}-%j.stderr
default-resources:
- partition=scicore
- qos=1day
- time=12:00:00
- mem_mb=20000
- "partition=scicore"
- "qos='1day'"
- "time='12:00:00'"
- "mem_mb=20000"
restart-times: 3
max-jobs-per-second: 10
max-status-checks-per-second: 1
local-cores: 1
latency-wait: 60
jobs: 500
jobs: 10
keep-going: True
rerun-incomplete: True
printshellcmds: True
......
......@@ -2,15 +2,17 @@
#
##############################
samples: config/samples.tsv
outdir: ./results
samples: "config/samples.tsv"
outdir: "./results"
ref:
genome_size: 4.4m
gbf: resources/H37Rv.gbf
genome_size: "4.4m"
gbf: "resources/H37Rv.gbf"
bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db
container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif
bakta_db: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db"
container: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif"
annotate: "No"
threads_per_job: 4
......
......@@ -7,3 +7,4 @@ channels:
dependencies:
- snakemake=7.32.4
- singularity=3.8.6
- biopython
......@@ -2,7 +2,6 @@
import argparse
import os
import yaml
import sys
def get_args():
......@@ -12,15 +11,17 @@ def get_args():
# Parameter groups
parser_io = parser.add_argument_group('INPUT/OUTPUT')
parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION')
parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION (not implemented yet)')
# INPUT/OUTPUT
parser_io.add_argument('-s', '--samples', required=True, help='Path to tab-separeted table, no header, with sample name and path to fastq with HiFi reads.')
parser_io.add_argument('-s', '--samples', required=True, help='Absolute path to tab-separated table, no header, with sample name and path to fastq with HiFi reads.')
parser_io.add_argument('-o', '--outdir', required=True, help='Output directory for the results.')
parser_io.add_argument('-o', '--outdir', required=True, help='Absolute path to output directory.')
parser_io.add_argument('-n', '--dry_run', action='store_true', help='Do snakemake dry run.')
# CLUSTER CONFIG
# CLUSTER CONFIG (not implemented, would have to temper with the cluster config file)
parser_cluster.add_argument('-j', '--njobs', default='4', help='Number of jobs to run in parallel. [4]')
parser_cluster.add_argument('-t', '--threads', default='10', help='Threads per job. [10]' )
......@@ -36,8 +37,7 @@ def main():
# Infer pipeline location from path of run_assembly_pipeline.py
pl_path = os.path.dirname(os.path.abspath(sys.argv[0]))
print(pl_path)
# Directories for which singularity needs to be given access
bind_dirs = [
"/scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux",
......@@ -47,24 +47,43 @@ def main():
pl_path
]
# Infer folders with samples, to add them to bind_dirs
sample_dirs = set()
with open(args.samples) as f:
for line in f:
fields = line.strip().split()
fastq_path = fields[1]
fastq_dir = os.path.dirname(os.path.realpath(fastq_path))
sample_dirs.add(fastq_dir)
bind_dirs = bind_dirs + list(sample_dirs)
singularity_args = "--bind " + " --bind ".join(bind_dirs)
cmd = [
"snakemake",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--profile", pl_path + "/config/cluster_config.yaml",
# Overwrite samples and outdir parameters
"--config", "samples=" + args.samples,
"--config", "outdir=" + args.outdir,
"--jobs", args.njobs,
"--cleanup-shadow",
"--use-singularity",
"--singularity-args" + " \"" + singularity_args + "\""
]
#print(" ".join(cmd))
if args.dry_run:
cmd = [
"snakemake -n",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\""
]
else:
cmd = [
"snakemake",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--profile", pl_path + "/cluster",
"--use-singularity",
"--singularity-args" + " \"" + singularity_args + "\"",
# Overwrite samples and outdir parameters in configfile
"--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\""
]
print("\n" + " ".join(cmd) + "\n")
os.system(" ".join(cmd))
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment