Skip to content
Snippets Groups Projects
Commit 752f06bb authored by Christoph Stritt's avatar Christoph Stritt
Browse files

First working version

parent 2d44d68a
No related branches found
No related tags found
No related merge requests found
......@@ -7,3 +7,5 @@ assembly/resources/bakta_db
facienda.md
variantcalling/container/pggb_latest.sif
variantcalling/.snakemake
assembly/logs
assembly/.fontconfig
......@@ -13,14 +13,18 @@ The user needs to provide two things to run the workflow on her samples:
- a config file with some global options for the analysis
- a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads.
## Clone the directory
## Create conda environment containing snakemake and singularity
```
conda env create -f environment.yml
conda env create -f config/environment.yml
```
## config.yml
## Run the pipeline
In the file config/config.yaml some global parameters can be set:
```yaml
......
......@@ -9,16 +9,16 @@ cluster:
--output=logs/{rule}/{rule}-{wildcards}-%j.stdout
--error=logs/{rule}/{rule}-{wildcards}-%j.stderr
default-resources:
- partition=scicore
- qos=1day
- time=12:00:00
- mem_mb=20000
- "partition=scicore"
- "qos='1day'"
- "time='12:00:00'"
- "mem_mb=20000"
restart-times: 3
max-jobs-per-second: 10
max-status-checks-per-second: 1
local-cores: 1
latency-wait: 60
jobs: 500
jobs: 10
keep-going: True
rerun-incomplete: True
printshellcmds: True
......
......@@ -2,15 +2,17 @@
#
##############################
samples: config/samples.tsv
outdir: ./results
samples: "config/samples.tsv"
outdir: "./results"
ref:
genome_size: 4.4m
gbf: resources/H37Rv.gbf
genome_size: "4.4m"
gbf: "resources/H37Rv.gbf"
bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db
container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif
bakta_db: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db"
container: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif"
annotate: "No"
threads_per_job: 4
......
......@@ -7,3 +7,4 @@ channels:
dependencies:
- snakemake=7.32.4
- singularity=3.8.6
- biopython
......@@ -2,7 +2,6 @@
import argparse
import os
import yaml
import sys
def get_args():
......@@ -12,15 +11,17 @@ def get_args():
# Parameter groups
parser_io = parser.add_argument_group('INPUT/OUTPUT')
parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION')
parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION (not implemented yet)')
# INPUT/OUTPUT
parser_io.add_argument('-s', '--samples', required=True, help='Path to tab-separeted table, no header, with sample name and path to fastq with HiFi reads.')
parser_io.add_argument('-s', '--samples', required=True, help='Absolute path to tab-separated table, no header, with sample name and path to fastq with HiFi reads.')
parser_io.add_argument('-o', '--outdir', required=True, help='Output directory for the results.')
parser_io.add_argument('-o', '--outdir', required=True, help='Absolute path to output directory.')
parser_io.add_argument('-n', '--dry_run', action='store_true', help='Do snakemake dry run.')
# CLUSTER CONFIG
# CLUSTER CONFIG (not implemented, would have to temper with the cluster config file)
parser_cluster.add_argument('-j', '--njobs', default='4', help='Number of jobs to run in parallel. [4]')
parser_cluster.add_argument('-t', '--threads', default='10', help='Threads per job. [10]' )
......@@ -36,8 +37,7 @@ def main():
# Infer pipeline location from path of run_assembly_pipeline.py
pl_path = os.path.dirname(os.path.abspath(sys.argv[0]))
print(pl_path)
# Directories for which singularity needs to be given access
bind_dirs = [
"/scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux",
......@@ -47,24 +47,43 @@ def main():
pl_path
]
# Infer folders with samples, to add them to bind_dirs
sample_dirs = set()
with open(args.samples) as f:
for line in f:
fields = line.strip().split()
fastq_path = fields[1]
fastq_dir = os.path.dirname(os.path.realpath(fastq_path))
sample_dirs.add(fastq_dir)
bind_dirs = bind_dirs + list(sample_dirs)
singularity_args = "--bind " + " --bind ".join(bind_dirs)
cmd = [
"snakemake",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--profile", pl_path + "/config/cluster_config.yaml",
# Overwrite samples and outdir parameters
"--config", "samples=" + args.samples,
"--config", "outdir=" + args.outdir,
"--jobs", args.njobs,
"--cleanup-shadow",
"--use-singularity",
"--singularity-args" + " \"" + singularity_args + "\""
]
#print(" ".join(cmd))
if args.dry_run:
cmd = [
"snakemake -n",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\""
]
else:
cmd = [
"snakemake",
"--snakefile", pl_path + "/workflow/Snakefile",
"--directory", pl_path,
"--configfile", pl_path + "/config/config.yaml",
"--profile", pl_path + "/cluster",
"--use-singularity",
"--singularity-args" + " \"" + singularity_args + "\"",
# Overwrite samples and outdir parameters in configfile
"--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\""
]
print("\n" + " ".join(cmd) + "\n")
os.system(" ".join(cmd))
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment