diff --git a/.gitignore b/.gitignore index 155279f82b41df9f3e30436c29c74f70191ce43f..479190f97f1a8c16a3319dceee47caf8be1a0178 100755 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,3 @@ assembly/resources/bakta_db facienda.md variantcalling/container/pggb_latest.sif variantcalling/.snakemake -assembly/logs -assembly/.fontconfig diff --git a/assembly/README.md b/assembly/README.md index 177b319bc29cad12ba7db7fdd5089842a3f0986d..8c339e5721c86fa4741a2b0c236bea8d961d7190 100755 --- a/assembly/README.md +++ b/assembly/README.md @@ -13,18 +13,14 @@ The user needs to provide two things to run the workflow on her samples: - a config file with some global options for the analysis - a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads. - -## Clone the directory - - ## Create conda environment containing snakemake and singularity ``` -conda env create -f config/environment.yml +conda env create -f environment.yml ``` -## Run the pipeline +## config.yml In the file config/config.yaml some global parameters can be set: ```yaml diff --git a/assembly/cluster/config.yaml b/assembly/config/cluster_config.yaml similarity index 86% rename from assembly/cluster/config.yaml rename to assembly/config/cluster_config.yaml index 9deb95e863bcd5a6811ba91847f13cd99097dabe..a8eb21698259a77bfd2a075f95285bb9c955f5ad 100644 --- a/assembly/cluster/config.yaml +++ b/assembly/config/cluster_config.yaml @@ -9,16 +9,16 @@ cluster: --output=logs/{rule}/{rule}-{wildcards}-%j.stdout --error=logs/{rule}/{rule}-{wildcards}-%j.stderr default-resources: - - "partition=scicore" - - "qos='1day'" - - "time='12:00:00'" - - "mem_mb=20000" + - partition=scicore + - qos=1day + - time=12:00:00 + - mem_mb=20000 restart-times: 3 max-jobs-per-second: 10 max-status-checks-per-second: 1 local-cores: 1 latency-wait: 60 -jobs: 10 +jobs: 500 keep-going: True rerun-incomplete: True printshellcmds: True diff --git a/assembly/config/config.yaml b/assembly/config/config.yaml index ee2e9b6623ce042b85cf734465653384738b886a..eb11d3bea3047d9be05641541f294eb53ad152b0 100755 --- a/assembly/config/config.yaml +++ b/assembly/config/config.yaml @@ -2,17 +2,15 @@ # ############################## -samples: "config/samples.tsv" -outdir: "./results" +samples: config/samples.tsv +outdir: ./results ref: - genome_size: "4.4m" - gbf: "resources/H37Rv.gbf" + genome_size: 4.4m + gbf: resources/H37Rv.gbf -bakta_db: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db" -container: "/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif" - -annotate: "No" +bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db +container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif threads_per_job: 4 diff --git a/assembly/config/environment.yml b/assembly/config/environment.yml index 1946e195954f0eb9e6493dfdc05d9f64059563eb..f5e4dfb68a298668efd08fbd932411e9cf7f2d77 100644 --- a/assembly/config/environment.yml +++ b/assembly/config/environment.yml @@ -7,4 +7,3 @@ channels: dependencies: - snakemake=7.32.4 - singularity=3.8.6 - - biopython diff --git a/assembly/run_assembly_pipeline.py b/assembly/run_assembly_pipeline.py index 490b900ad352bcf6504fdb32d25e6665f0cc24c9..388c61689988a8ac2eeeb7f139005c6a9f74435e 100755 --- a/assembly/run_assembly_pipeline.py +++ b/assembly/run_assembly_pipeline.py @@ -2,6 +2,7 @@ import argparse import os +import yaml import sys def get_args(): @@ -11,17 +12,15 @@ def get_args(): # Parameter groups parser_io = parser.add_argument_group('INPUT/OUTPUT') - parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION (not implemented yet)') + parser_cluster = parser.add_argument_group('CLUSTER CONFIGURATION') # INPUT/OUTPUT - parser_io.add_argument('-s', '--samples', required=True, help='Absolute path to tab-separated table, no header, with sample name and path to fastq with HiFi reads.') + parser_io.add_argument('-s', '--samples', required=True, help='Path to tab-separeted table, no header, with sample name and path to fastq with HiFi reads.') - parser_io.add_argument('-o', '--outdir', required=True, help='Absolute path to output directory.') + parser_io.add_argument('-o', '--outdir', required=True, help='Output directory for the results.') - parser_io.add_argument('-n', '--dry_run', action='store_true', help='Do snakemake dry run.') - - # CLUSTER CONFIG (not implemented, would have to temper with the cluster config file) + # CLUSTER CONFIG parser_cluster.add_argument('-j', '--njobs', default='4', help='Number of jobs to run in parallel. [4]') parser_cluster.add_argument('-t', '--threads', default='10', help='Threads per job. [10]' ) @@ -37,7 +36,8 @@ def main(): # Infer pipeline location from path of run_assembly_pipeline.py pl_path = os.path.dirname(os.path.abspath(sys.argv[0])) - + print(pl_path) + # Directories for which singularity needs to be given access bind_dirs = [ "/scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux", @@ -47,43 +47,24 @@ def main(): pl_path ] - # Infer folders with samples, to add them to bind_dirs - sample_dirs = set() - with open(args.samples) as f: - for line in f: - fields = line.strip().split() - fastq_path = fields[1] - fastq_dir = os.path.dirname(os.path.realpath(fastq_path)) - sample_dirs.add(fastq_dir) - - bind_dirs = bind_dirs + list(sample_dirs) - singularity_args = "--bind " + " --bind ".join(bind_dirs) - if args.dry_run: - - cmd = [ - "snakemake -n", - "--snakefile", pl_path + "/workflow/Snakefile", - "--directory", pl_path, - "--configfile", pl_path + "/config/config.yaml", - "--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\"" - ] - - else: - cmd = [ - "snakemake", - "--snakefile", pl_path + "/workflow/Snakefile", - "--directory", pl_path, - "--configfile", pl_path + "/config/config.yaml", - "--profile", pl_path + "/cluster", - "--use-singularity", - "--singularity-args" + " \"" + singularity_args + "\"", - # Overwrite samples and outdir parameters in configfile - "--config", "samples=\"" + args.samples + "\"" + " outdir=\"" + args.outdir + "\"" - ] - - print("\n" + " ".join(cmd) + "\n") + cmd = [ + "snakemake", + "--snakefile", pl_path + "/workflow/Snakefile", + "--directory", pl_path, + "--configfile", pl_path + "/config/config.yaml", + "--profile", pl_path + "/config/cluster_config.yaml", + # Overwrite samples and outdir parameters + "--config", "samples=" + args.samples, + "--config", "outdir=" + args.outdir, + "--jobs", args.njobs, + "--cleanup-shadow", + "--use-singularity", + "--singularity-args" + " \"" + singularity_args + "\"" + ] + + #print(" ".join(cmd)) os.system(" ".join(cmd)) if __name__ == '__main__':