From 2886d21cfd3fc6509cc169191bc6d406b18f708f Mon Sep 17 00:00:00 2001 From: Christoph Stritt <christoph.stritt@unibas.ch> Date: Tue, 16 Jan 2024 10:51:49 +0000 Subject: [PATCH] Revert "Merge branch 'scicore' of https://git.scicore.unibas.ch/TBRU/PacbioSnake into scicore" This reverts commit 0d6ddf818d1c47682c9aba56b0d25c55f5ff1dc2 --- .gitignore | 2 - assembly/README.md | 8 +-- .../cluster_config.yaml} | 10 +-- assembly/config/config.yaml | 14 ++-- assembly/config/environment.yml | 1 - assembly/run_assembly_pipeline.py | 65 +++++++------------ 6 files changed, 36 insertions(+), 64 deletions(-) rename assembly/{cluster/config.yaml => config/cluster_config.yaml} (86%) diff --git a/.gitignore b/.gitignore index 155279f..479190f 100755 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,3 @@ assembly/resources/bakta_db facienda.md variantcalling/container/pggb_latest.sif variantcalling/.snakemake -assembly/logs -assembly/.fontconfig diff --git a/assembly/README.md b/assembly/README.md index 177b319..8c339e5 100755 --- a/assembly/README.md +++ b/assembly/README.md @@ -13,18 +13,14 @@ The user needs to provide two things to run the workflow on her samples: - a config file with some global options for the analysis - a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads. - -## Clone the directory - - ## Create conda environment containing snakemake and singularity ``` -conda env create -f config/environment.yml +conda env create -f environment.yml ``` -## Run the pipeline +## config.yml In the file config/config.yaml some global parameters can be set: ```yaml diff --git a/assembly/cluster/config.yaml b/assembly/config/cluster_config.yaml similarity index 86% rename from assembly/cluster/config.yaml rename to assembly/config/cluster_config.yaml index 9deb95e..a8eb216 100644 --- a/assembly/cluster/config.yaml +++ b/assembly/config/cluster_config.yaml @@ -9,16 +9,16 @@ cluster: --output=logs/{rule}/{rule}-{wildcards}-%j.stdout --error=logs/{rule}/{rule}-{wildcards}-%j.stderr default-resources: - - "partition=scicore" - - "qos='1day'" - - "time='12:00:00'" - - "mem_mb=20000" + - partition=scicore + - qos=1day + - time=12:00:00 + - mem_mb=20000 restart-times: 3 max-jobs-per-second: 10 max-status-checks-per-second: 1 local-cores: 1 latency-wait: 60 -jobs: 10 +jobs: 500 keep-going: True rerun-incomplete: True printshellcmds: True diff --git a/assembly/config/config.yaml b/assembly/config/config.yaml index ee2e9b6..eb11d3b 100755 --- a/assembly/config/config.yaml +++ b/assembly/config/config.yaml @@ -2,17 +2,15 @@ # ############################## -samples: "config/samples.tsv" -outdir: "./results" +samples: config/samples.tsv +outdir: ./results ref: - genome_size: "4.4m" - gbf: "resources/H37Rv.gbf" + genome_size: 4.4m + gbf: resources/H37Rv.gbf -bakta_db: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db" -container: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif" - -annotate: "No" +bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db +container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif threads_per_job: 4 diff --git a/assembly/config/environment.yml b/assembly/config/environment.yml index 1946e19..f5e4dfb 100644 --- a/assembly/config/environment.yml +++ b/assembly/config/environment.yml @@ -7,4 +7,3 @@ channels: dependencies: - snakemake=7.32.4 - singularity=3.8.6 - - biopython diff --git a/assembly/run_assembly_pipeline.py b/assembly/run_assembly_pipeline.py index 490b900..388c616 100755 --- a/assembly/run_assembly_pipeline.py +++ b/assembly/run_assembly_pipeline.py @@ -2,6 +2,7 @@ import argparse import os +import yaml import sys def get_args(): @@ -11,17 +12,15 @@ def get_args(): # Parameter groups parser_io = parser.add_argument_group('INPUT/OUTPUT') - parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION (not implemented yet)') + parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION') # INPUT/OUTPUT - parser_io.add_argument('-s', '--samples', required=True, help='Absolute path to tab-separated table, no header, with sample name and path to fastq with HiFi reads.') + parser_io.add_argument('-s', '--samples', required=True, help='Path to tab-separeted table, no header, with sample name and path to fastq with HiFi reads.') - parser_io.add_argument('-o', '--outdir', required=True, help='Absolute path to output directory.') + parser_io.add_argument('-o', '--outdir', required=True, help='Output directory for the results.') - parser_io.add_argument('-n', '--dry_run', action='store_true', help='Do snakemake dry run.') - - # CLUSTER CONFIG (not implemented, would have to temper with the cluster config file) + # CLUSTER CONFIG parser_cluster.add_argument('-j', '--njobs', default='4', help='Number of jobs to run in parallel. [4]') parser_cluster.add_argument('-t', '--threads', default='10', help='Threads per job. [10]' ) @@ -37,7 +36,8 @@ def main(): # Infer pipeline location from path of run_assembly_pipeline.py pl_path = os.path.dirname(os.path.abspath(sys.argv[0])) - + print(pl_path) + # Directories for which singularity needs to be given access bind_dirs = [ "/scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux", @@ -47,43 +47,24 @@ def main(): pl_path ] - # Infer folders with samples, to add them to bind_dirs - sample_dirs = set() - with open(args.samples) as f: - for line in f: - fields = line.strip().split() - fastq_path = fields[1] - fastq_dir = os.path.dirname(os.path.realpath(fastq_path)) - sample_dirs.add(fastq_dir) - - bind_dirs = bind_dirs + list(sample_dirs) - singularity_args = "--bind " + " --bind ".join(bind_dirs) - if args.dry_run: - - cmd = [ - "snakemake -n", - "--snakefile", pl_path + "/workflow/Snakefile", - "--directory", pl_path, - "--configfile", pl_path + "/config/config.yaml", - "--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\"" - ] - - else: - cmd = [ - "snakemake", - "--snakefile", pl_path + "/workflow/Snakefile", - "--directory", pl_path, - "--configfile", pl_path + "/config/config.yaml", - "--profile", pl_path + "/cluster", - "--use-singularity", - "--singularity-args" + " \"" + singularity_args + "\"", - # Overwrite samples and outdir parameters in configfile - "--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\"" - ] - - print("\n" + " ".join(cmd) + "\n") + cmd = [ + "snakemake", + "--snakefile", pl_path + "/workflow/Snakefile", + "--directory", pl_path, + "--configfile", pl_path + "/config/config.yaml", + "--profile", pl_path + "/config/cluster_config.yaml", + # Overwrite samples and outdir parameters + "--config", "samples=" + args.samples, + "--config", "outdir=" + args.outdir, + "--jobs", args.njobs, + "--cleanup-shadow", + "--use-singularity", + "--singularity-args" + " \"" + singularity_args + "\"" + ] + + #print(" ".join(cmd)) os.system(" ".join(cmd)) if __name__ == '__main__': -- GitLab