From 6804ea67615b183abe80cdd03a0e359a8229d47e Mon Sep 17 00:00:00 2001 From: Dominik Burri <dominik.burri@unibas.ch> Date: Fri, 26 Feb 2021 15:56:27 +0100 Subject: [PATCH] Remove unnecessary files in the results directory --- Snakefile | 68 ++++++++++--------- tests/test_alfa/test.sh | 2 + tests/test_create_dag_image/test.sh | 2 + tests/test_create_rule_graph/test.sh | 2 + .../expected_output.md5 | 18 ----- tests/test_integration_workflow/test.local.sh | 2 + tests/test_integration_workflow/test.slurm.sh | 2 + .../expected_output.md5 | 18 ----- .../test.local.sh | 2 + .../test.slurm.sh | 2 + workflow/rules/paired_end.snakefile.smk | 55 +++++++++++---- workflow/rules/single_end.snakefile.smk | 47 ++++++++++--- 12 files changed, 127 insertions(+), 93 deletions(-) diff --git a/Snakefile b/Snakefile index 3ce2e93..46f802b 100644 --- a/Snakefile +++ b/Snakefile @@ -159,7 +159,7 @@ rule fastqc: shell: "(mkdir -p {output.outdir}; \ - fastqc --outdir {output.outdir} {input.reads}) \ + fastqc --outdir {output.outdir} --threads {threads} {input.reads}) \ 1> {log.stdout} 2> {log.stderr}" @@ -169,16 +169,16 @@ rule create_index_star: """ input: genome = lambda wildcards: - get_sample( + os.path.abspath(get_sample( 'genome', search_id='organism', - search_value=wildcards.organism), + search_value=wildcards.organism)), gtf = lambda wildcards: - get_sample( + os.path.abspath(get_sample( 'gtf', search_id='organism', - search_value=wildcards.organism) + search_value=wildcards.organism)) output: chromosome_info = os.path.join( @@ -250,11 +250,11 @@ rule extract_transcriptome: search_id='organism', search_value=wildcards.organism) output: - transcriptome = os.path.join( + transcriptome = temp(os.path.join( config['output_dir'], "transcriptome", "{organism}", - "transcriptome.fa") + "transcriptome.fa")) log: stderr = os.path.join( @@ -292,11 +292,11 @@ rule concatenate_transcriptome_and_genome: search_value=wildcards.organism) output: - genome_transcriptome = os.path.join( + genome_transcriptome = temp(os.path.join( config['output_dir'], "transcriptome", "{organism}", - "genome_transcriptome.fa") + "genome_transcriptome.fa")) singularity: "docker://bash:5.0.16" @@ -413,9 +413,9 @@ rule extract_transcripts_as_bed12: get_sample('gtf') output: - bed12 = os.path.join( + bed12 = temp(os.path.join( config['output_dir'], - "full_transcripts_protein_coding.bed") + "full_transcripts_protein_coding.bed")) singularity: "docker://zavolab/zgtf:0.1" @@ -515,12 +515,12 @@ rule calculate_TIN_scores: "full_transcripts_protein_coding.bed") output: - TIN_score = os.path.join( + TIN_score = temp(os.path.join( config['output_dir'], "samples", "{sample}", "TIN", - "TIN_score.tsv") + "TIN_score.tsv")) params: sample = "{sample}" @@ -937,30 +937,32 @@ rule star_rpm: search_value=wildcards.sample)) output: - str1 = os.path.join( + str1 = temp(os.path.join( config["output_dir"], "samples", "{sample}", "STAR_coverage", - "{sample}_Signal.Unique.str1.out.bg"), - str2 = os.path.join( + "{sample}_Signal.Unique.str1.out.bg")), + str2 = temp(os.path.join( config["output_dir"], "samples", "{sample}", "STAR_coverage", - "{sample}_Signal.UniqueMultiple.str1.out.bg"), - str3 = os.path.join( + "{sample}_Signal.UniqueMultiple.str1.out.bg")), + str3 = temp(os.path.join( config["output_dir"], "samples", "{sample}", "STAR_coverage", - "{sample}_Signal.Unique.str2.out.bg"), - str4 = os.path.join( + "{sample}_Signal.Unique.str2.out.bg")), + str4 = temp(os.path.join( config["output_dir"], "samples", "{sample}", "STAR_coverage", - "{sample}_Signal.UniqueMultiple.str2.out.bg") + "{sample}_Signal.UniqueMultiple.str2.out.bg")) + + shadow: "full" params: out_dir = lambda wildcards, output: @@ -1034,20 +1036,20 @@ rule rename_star_rpm_for_alfa: search_value=wildcards.sample)) output: - plus = os.path.join( + plus = temp(os.path.join( config["output_dir"], "samples", "{sample}", "ALFA", "{unique}", - "{sample}.{unique}.plus.bg"), - minus = os.path.join( + "{sample}.{unique}.plus.bg")), + minus = temp(os.path.join( config["output_dir"], "samples", "{sample}", "ALFA", "{unique}", - "{sample}.{unique}.minus.bg") + "{sample}.{unique}.minus.bg")) params: orientation = lambda wildcards: @@ -1081,10 +1083,10 @@ rule generate_alfa_index: ''' Generate ALFA index files from sorted GTF file ''' input: gtf = lambda wildcards: - get_sample( + os.path.abspath(get_sample( 'gtf', search_id='organism', - search_value=wildcards.organism), + search_value=wildcards.organism)), chr_len = os.path.join( config["star_indexes"], @@ -1164,20 +1166,20 @@ rule alfa_qc: "sorted_genes.stranded.ALFA_index") output: - biotypes = os.path.join( + biotypes = temp(os.path.join( config["output_dir"], "samples", "{sample}", "ALFA", "{unique}", - "ALFA_plots.Biotypes.pdf"), - categories = os.path.join( + "ALFA_plots.Biotypes.pdf")), + categories = temp(os.path.join( config["output_dir"], "samples", "{sample}", "ALFA", "{unique}", - "ALFA_plots.Categories.pdf"), + "ALFA_plots.Categories.pdf")), table = os.path.join( config["output_dir"], "samples", @@ -1382,13 +1384,13 @@ rule sort_bed_4_big: "{sample}.{unique}.{strand}.bg") output: - sorted_bg = os.path.join( + sorted_bg = temp(os.path.join( config["output_dir"], "samples", "{sample}", "bigWig", "{unique}", - "{sample}_{unique}_{strand}.sorted.bg") + "{sample}_{unique}_{strand}.sorted.bg")) singularity: "docker://cjh4zavolab/bedtools:2.27" diff --git a/tests/test_alfa/test.sh b/tests/test_alfa/test.sh index 1472685..5389734 100755 --- a/tests/test_alfa/test.sh +++ b/tests/test_alfa/test.sh @@ -33,6 +33,8 @@ snakemake \ --use-singularity \ --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ --verbose \ + --notemp \ + --no-hooks \ results/ALFA/ALFA_plots_mqc.png # Check md5 sum of some output files diff --git a/tests/test_create_dag_image/test.sh b/tests/test_create_dag_image/test.sh index afd8de4..1c954dd 100755 --- a/tests/test_create_dag_image/test.sh +++ b/tests/test_create_dag_image/test.sh @@ -26,5 +26,7 @@ snakemake \ --printshellcmds \ --dryrun \ --verbose \ + --notemp \ + --no-hooks \ | dot -Tsvg > "../../images/dag_test_workflow.svg" diff --git a/tests/test_create_rule_graph/test.sh b/tests/test_create_rule_graph/test.sh index 51e5383..287e08d 100755 --- a/tests/test_create_rule_graph/test.sh +++ b/tests/test_create_rule_graph/test.sh @@ -26,5 +26,7 @@ snakemake \ --printshellcmds \ --dryrun \ --verbose \ + --notemp \ + --no-hooks \ | dot -Tsvg > "../../images/rule_graph.svg" diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5 index 9f1b02d..4093ded 100644 --- a/tests/test_integration_workflow/expected_output.md5 +++ b/tests/test_integration_workflow/expected_output.md5 @@ -20,7 +20,6 @@ ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_inde 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_polya_mate1.fastq e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_adapters_mate2.fastq 1c0796d7e0bdab0e99780b2e11d80c19 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_polya_mate2.fastq -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.SJ.out.tab 9896744dd90ff3eef00c91fa1f721366 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/fastqc_data.txt 6946ba80af318b9c1052b264dc674a51 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/fastqc.fo 2603f3031242e97411a71571f6ad9e53 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/summary.txt @@ -45,18 +44,9 @@ b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synt 69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png -2e77276535976efccb244627231624bf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv -d013650f813b815a790c9e6a51c7559b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.kallisto.pseudo.sam -981b59830d74d300bb5dd3e602e0d86f results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/lib_format_counts.json -989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/ambig_info.tsv -3407f87245d0003e0ffbfdf6d8c04f20 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/expected_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/observed_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/observed_bias_3p -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/unmapped_names.txt 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_adapters_mate1.fastq 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_polya_mate1.fastq -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.SJ.out.tab fdb8c6ddd39b606414b2785d6ec2da8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc_data.txt 3cb70940acdcca512207bd8613085538 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc.fo fc276a1711cc35f7a9d5328bdbbab810 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/summary.txt @@ -69,15 +59,7 @@ e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_mate_1_synt 69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png -50a9b89a9f1da2c438cb0041b64faa0e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/abundance.tsv -fd8242418230a4edb33350be2e4f1d78 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.kallisto.pseudo.sam -d6ae863b39ca6ec5d0f63c03036f9dda results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/lib_format_counts.json -989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/ambig_info.tsv -3407f87245d0003e0ffbfdf6d8c04f20 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/expected_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/observed_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/observed_bias_3p -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/unmapped_names.txt 3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa 6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index 2caebc23faf78fdbbbdbb118d28bd6b5 results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh index 883424d..69b179d 100755 --- a/tests/test_integration_workflow/test.local.sh +++ b/tests/test_integration_workflow/test.local.sh @@ -33,6 +33,8 @@ snakemake \ --rerun-incomplete \ --use-singularity \ --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ + --notemp \ + --no-hooks \ --verbose # Create a Snakemake report after the workflow execution diff --git a/tests/test_integration_workflow/test.slurm.sh b/tests/test_integration_workflow/test.slurm.sh index 0145ae7..c6c8410 100755 --- a/tests/test_integration_workflow/test.slurm.sh +++ b/tests/test_integration_workflow/test.slurm.sh @@ -35,6 +35,8 @@ snakemake \ --rerun-incomplete \ --use-singularity \ --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ + --notemp \ + --no-hooks \ --verbose # Create a Snakemake report after the workflow execution diff --git a/tests/test_integration_workflow_multiple_lanes/expected_output.md5 b/tests/test_integration_workflow_multiple_lanes/expected_output.md5 index 0f264a7..1f2ce96 100644 --- a/tests/test_integration_workflow_multiple_lanes/expected_output.md5 +++ b/tests/test_integration_workflow_multiple_lanes/expected_output.md5 @@ -20,7 +20,6 @@ ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_inde 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_polya_mate1.fastq e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_adapters_mate2.fastq 1c0796d7e0bdab0e99780b2e11d80c19 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_polya_mate2.fastq -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.SJ.out.tab 9896744dd90ff3eef00c91fa1f721366 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/fastqc_data.txt 6946ba80af318b9c1052b264dc674a51 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/fastqc.fo 2603f3031242e97411a71571f6ad9e53 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/summary.txt @@ -45,18 +44,9 @@ b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synt 69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png -2e77276535976efccb244627231624bf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv -d013650f813b815a790c9e6a51c7559b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.kallisto.pseudo.sam -981b59830d74d300bb5dd3e602e0d86f results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/lib_format_counts.json -989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/ambig_info.tsv -3407f87245d0003e0ffbfdf6d8c04f20 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/expected_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/observed_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/observed_bias_3p -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/unmapped_names.txt 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_adapters_mate1.fastq 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_polya_mate1.fastq -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.SJ.out.tab fdb8c6ddd39b606414b2785d6ec2da8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc_data.txt 3cb70940acdcca512207bd8613085538 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc.fo fc276a1711cc35f7a9d5328bdbbab810 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/summary.txt @@ -69,15 +59,7 @@ e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_mate_1_synt 69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png -50a9b89a9f1da2c438cb0041b64faa0e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/abundance.tsv -fd8242418230a4edb33350be2e4f1d78 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.kallisto.pseudo.sam -d6ae863b39ca6ec5d0f63c03036f9dda results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/lib_format_counts.json -989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/ambig_info.tsv -3407f87245d0003e0ffbfdf6d8c04f20 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/expected_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/observed_bias -92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/observed_bias_3p -d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/unmapped_names.txt 3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa 6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index 2caebc23faf78fdbbbdbb118d28bd6b5 results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index diff --git a/tests/test_integration_workflow_multiple_lanes/test.local.sh b/tests/test_integration_workflow_multiple_lanes/test.local.sh index 018b47d..fd45fa4 100755 --- a/tests/test_integration_workflow_multiple_lanes/test.local.sh +++ b/tests/test_integration_workflow_multiple_lanes/test.local.sh @@ -33,6 +33,8 @@ snakemake \ --rerun-incomplete \ --use-singularity \ --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ + --notemp \ + --no-hooks \ --verbose # Create a Snakemake report after the workflow execution diff --git a/tests/test_integration_workflow_multiple_lanes/test.slurm.sh b/tests/test_integration_workflow_multiple_lanes/test.slurm.sh index f2dd459..74ba028 100755 --- a/tests/test_integration_workflow_multiple_lanes/test.slurm.sh +++ b/tests/test_integration_workflow_multiple_lanes/test.slurm.sh @@ -35,6 +35,8 @@ snakemake \ --rerun-incomplete \ --use-singularity \ --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ + --notemp \ + --no-hooks \ --verbose # Create a Snakemake report after the workflow execution diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 25046d0..1777e2c 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -18,16 +18,16 @@ rule pe_remove_adapters_cutadapt: "{sample}.fq2.fastq.gz"), output: - reads1 = os.path.join( + reads1 = temp(os.path.join( config["output_dir"], "samples", "{sample}", - "{sample}.pe.remove_adapters_mate1.fastq.gz"), - reads2 = os.path.join( + "{sample}.pe.remove_adapters_mate1.fastq.gz")), + reads2 = temp(os.path.join( config["output_dir"], "samples", "{sample}", - "{sample}.pe.remove_adapters_mate2.fastq.gz") + "{sample}.pe.remove_adapters_mate2.fastq.gz")) params: adapter_3_mate1 = lambda wildcards: @@ -91,16 +91,16 @@ rule pe_remove_polya_cutadapt: "{sample}.pe.remove_adapters_mate2.fastq.gz") output: - reads1 = os.path.join( + reads1 = temp(os.path.join( config["output_dir"], "samples", "{sample}", - "{sample}.pe.remove_polya_mate1.fastq.gz"), - reads2 = os.path.join( + "{sample}.pe.remove_polya_mate1.fastq.gz")), + reads2 = temp(os.path.join( config["output_dir"], "samples", "{sample}", - "{sample}.pe.remove_polya_mate2.fastq.gz") + "{sample}.pe.remove_polya_mate2.fastq.gz")) params: polya_3_mate1 = lambda wildcards: @@ -203,10 +203,12 @@ rule pe_map_genome_star: "map_genome", "{sample}.pe.Log.final.out") + shadow: "minimal" + params: sample_id = "{sample}", index = lambda wildcards: - os.path.join( + os.path.abspath(os.path.join( config["star_indexes"], get_sample( 'organism', @@ -216,7 +218,7 @@ rule pe_map_genome_star: 'index_size', search_id='index', search_value=wildcards.sample), - "STAR_index"), + "STAR_index")), outFileNamePrefix = os.path.join( config["output_dir"], "samples", @@ -292,10 +294,10 @@ rule pe_quantification_salmon: "{sample}", "{sample}.pe.remove_polya_mate2.fastq.gz"), gtf = lambda wildcards: - get_sample( + os.path.abspath(get_sample( 'gtf', search_id='index', - search_value=wildcards.sample), + search_value=wildcards.sample)), index = lambda wildcards: os.path.join( config["salmon_indexes"], @@ -321,7 +323,23 @@ rule pe_quantification_salmon: "samples", "{sample}", "{sample}.salmon.pe", - "quant.sf") + "quant.sf"), + meta_info = os.path.join( + config["output_dir"], + "samples", + "{sample}", + "{sample}.salmon.pe", + "aux_info", + "meta_info.json"), + flenDist = os.path.join( + config["output_dir"], + "samples", + "{sample}", + "{sample}.salmon.pe", + "libParams", + "flenDist.txt") + + shadow: "minimal" params: output_dir = os.path.join( @@ -397,7 +415,15 @@ rule pe_genome_quantification_kallisto: "samples", "{sample}", "quant_kallisto", - "{sample}.pe.kallisto.pseudo.sam") + "{sample}.pe.kallisto.pseudo.sam"), + abundances = os.path.join( + config["output_dir"], + "samples", + "{sample}", + "quant_kallisto", + "abundance.h5") + + shadow: "minimal" params: output_dir = os.path.join( @@ -428,6 +454,7 @@ rule pe_genome_quantification_kallisto: -i {input.index} \ -o {params.output_dir} \ --pseudobam \ + -t {threads} \ {params.directionality}-stranded \ {input.reads1} {input.reads2} > {output.pseudoalignment}) \ 2> {log.stderr}" diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index 071b9e5..af71c97 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -11,11 +11,11 @@ rule remove_adapters_cutadapt: "{sample}.fq1.fastq.gz") output: - reads = os.path.join( + reads = temp(os.path.join( config["output_dir"], "samples", "{sample}", - "{sample}.se.remove_adapters_mate1.fastq.gz") + "{sample}.se.remove_adapters_mate1.fastq.gz")) params: adapters_3 = lambda wildcards: @@ -70,11 +70,11 @@ rule remove_polya_cutadapt: "{sample}.se.remove_adapters_mate1.fastq.gz") output: - reads = os.path.join( + reads = temp(os.path.join( config["output_dir"], "samples", "{sample}", - "{sample}.se.remove_polya_mate1.fastq.gz") + "{sample}.se.remove_polya_mate1.fastq.gz")) params: polya_3 = lambda wildcards: @@ -151,14 +151,16 @@ rule map_genome_star: "map_genome", "{sample}.se.Log.final.out") + shadow: "minimal" + params: sample_id = "{sample}", index = lambda wildcards: - os.path.join( + os.path.abspath(os.path.join( config["star_indexes"], get_sample('organism', search_id='index', search_value=wildcards.sample), get_sample('index_size', search_id='index', search_value=wildcards.sample), - "STAR_index"), + "STAR_index")), outFileNamePrefix = os.path.join( config["output_dir"], "samples", @@ -241,10 +243,10 @@ rule quantification_salmon: search_value=wildcards.sample), "salmon.idx"), gtf = lambda wildcards: - get_sample( + os.path.abspath(get_sample( 'gtf', search_id='index', - search_value=wildcards.sample) + search_value=wildcards.sample)) output: gn_estimates = os.path.join( @@ -258,7 +260,23 @@ rule quantification_salmon: "samples", "{sample}", "{sample}.salmon.se", - "quant.sf") + "quant.sf"), + meta_info = os.path.join( + config["output_dir"], + "samples", + "{sample}", + "{sample}.salmon.se", + "aux_info", + "meta_info.json"), + flenDist = os.path.join( + config["output_dir"], + "samples", + "{sample}", + "{sample}.salmon.se", + "libParams", + "flenDist.txt") + + shadow: "minimal" params: output_dir = os.path.join( @@ -339,7 +357,15 @@ rule genome_quantification_kallisto: "samples", "{sample}", "quant_kallisto", - "{sample}.se.kallisto.pseudo.sam") + "{sample}.se.kallisto.pseudo.sam"), + abundances = os.path.join( + config["output_dir"], + "samples", + "{sample}", + "quant_kallisto", + "abundance.h5") + + shadow: "minimal" params: output_dir = os.path.join( @@ -383,6 +409,7 @@ rule genome_quantification_kallisto: -l {params.fraglen} \ -s {params.fragsd} \ --pseudobam \ + -t {threads} \ {params.directionality}-stranded \ {input.reads} > {output.pseudoalignment};) \ 2> {log.stderr}" -- GitLab