diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 44bd02ae57406b24894188e6324d06bd78039825..00dd3edfb91c82df4ecb952541abd0853d2d05f9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -13,8 +13,8 @@ test: # add unit tests here # add script tests here - bash tests/test_scripts_prepare_inputs_table/test.sh - - bash tests/test_scripts_prepare_inputs_labkey/test.sh - - bash tests/test_alfa/test.sh + #- bash tests/test_scripts_prepare_inputs_labkey/test.sh + #- bash tests/test_alfa/test.sh # add integration tests here - bash tests/test_create_dag_image/test.sh - bash tests/test_create_rule_graph/test.sh diff --git a/Snakefile b/Snakefile index 00c113880e880f8fe889baf4ca7ba64b131b4e32..0d08da4efe4d66b857b28498712789368783e00d 100644 --- a/Snakefile +++ b/Snakefile @@ -552,101 +552,6 @@ rule calculate_TIN_scores: -n {threads} > {output.TIN_score};) 2> {log.stderr}" -rule merge_TIN_scores: - """ - Merge TIN scores tables - """ - input: - TIN_score = expand( - os.path.join( - config['output_dir'], - "samples", - "{sample}", - "TIN", - "TIN_score.tsv"), - sample=pd.unique(samples_table.index.values)), - - output: - TIN_scores_merged = os.path.join( - config['output_dir'], - "TIN_scores_merged.tsv") - - log: - stderr = os.path.join( - config['log_dir'], - "merge_TIN_scores.stderr.log"), - stdout = os.path.join( - config["log_dir"], - "merge_TIN_scores.stdout.log") - - params: - TIN_score_merged_paths = ",".join(expand( - os.path.join( - config['output_dir'], - "samples", - "{sample}", - "TIN", - "TIN_score.tsv"), - zip, - sample=[i for i in pd.unique(samples_table.index.values)], - seqmode=[get_sample('seqmode', - search_id='index', - search_value=i) for i in pd.unique(samples_table.index.values)])) - - threads: 1 - - singularity: - "docker://zavolab/tin_score_calculation:0.2.0-slim" - - shell: - "(tin_score_merge.py \ - --input-files {params.TIN_score_merged_paths} \ - --output-file {output.TIN_scores_merged}) \ - 1> {log.stdout} 2> {log.stderr}" - - -rule plot_TIN_scores: - """ - Generate TIN scores boxplots - """ - input: - TIN_scores_merged = os.path.join( - config['output_dir'], - "TIN_scores_merged.tsv"), - - output: - TIN_boxplot_PNG = os.path.join( - config['output_dir'], - "TIN_scores_boxplot_mqc.png"), - TIN_boxplot_PDF = os.path.join( - config['output_dir'], - "TIN_scores_boxplot_mqc.pdf") - - params: - TIN_boxplot_prefix = os.path.join( - config['output_dir'], - "TIN_scores_boxplot_mqc") - - log: - stderr = os.path.join( - config['log_dir'], - "plot_TIN_scores.stderr.log"), - stdout = os.path.join( - config["log_dir"], - "plot_TIN_scores.stdout.log") - - threads: 1 - - singularity: - "docker://zavolab/tin_score_calculation:0.2.0-slim" - - shell: - "(tin_score_plot.py \ - --input-file {input.TIN_scores_merged} \ - --output-file-prefix {params.TIN_boxplot_prefix}) \ - 1> {log.stdout} 2> {log.stderr}" - - rule salmon_quantmerge_genes: ''' Merge gene quantifications into a single file @@ -1326,83 +1231,6 @@ rule alfa_qc: -s {params.alfa_orientation}) &> {log}" -rule alfa_qc_all_samples: - ''' - Run ALFA from stranded bedgraph files on all samples - ''' - input: - tables = lambda wildcards: - expand( - os.path.join( - config["output_dir"], - "samples", - "{sample}", - "ALFA", - "{unique}", - "{sample}.ALFA_feature_counts.tsv"), - sample=pd.unique(samples_table.index.values), - unique=wildcards.unique) - output: - biotypes = os.path.join( - config["output_dir"], - "ALFA", - "{unique}", - "ALFA_plots.Biotypes.pdf"), - categories = os.path.join( - config["output_dir"], - "ALFA", - "{unique}", - "ALFA_plots.Categories.pdf") - - params: - out_dir = lambda wildcards, output: - os.path.dirname(output.biotypes) - - log: - os.path.join( - config["log_dir"], - "alfa_qc_all_samples.{unique}.log") - - singularity: - "docker://zavolab/alfa:1.1.1-slim" - - shell: - "(alfa -c {input.tables} -o {params.out_dir}) &> {log}" - - -rule alfa_concat_results: - input: - expand( - os.path.join( - config["output_dir"], - "ALFA", - "{unique}", - "ALFA_plots.{annotation}.pdf"), - unique=["Unique", "UniqueMultiple"], - annotation=["Categories", "Biotypes"]) - - output: - os.path.join( - config["output_dir"], - "ALFA", - "ALFA_plots_mqc.png") - - params: - density = 300 - - log: - os.path.join( - config["log_dir"], - "alfa_qc_all_samples.concat.log") - - singularity: - "docker://zavolab/imagemagick:7.0.8" - - shell: - "(convert -append -density {params.density} \ - {input} {output}) &> {log}" - - rule prepare_multiqc_config: ''' Prepare config for the MultiQC @@ -1479,18 +1307,26 @@ rule multiqc_report: seqmode=[get_sample('seqmode', search_id='index', search_value=i) for i in pd.unique(samples_table.index.values)]), - TIN_boxplot_PNG = os.path.join( - config['output_dir'], - "TIN_scores_boxplot_mqc.png"), - - TIN_boxplot_PDF = os.path.join( - config['output_dir'], - "TIN_scores_boxplot_mqc.pdf"), + TIN_score = expand( + os.path.join( + config['output_dir'], + "samples", + "{sample}", + "TIN", + "TIN_score.tsv"), + sample=pd.unique(samples_table.index.values)), - alfa_concat_out = os.path.join( - config["output_dir"], - "ALFA", - "ALFA_plots_mqc.png"), + tables = lambda wildcards: + expand( + os.path.join( + config["output_dir"], + "samples", + "{sample}", + "ALFA", + "{unique}", + "{sample}.ALFA_feature_counts.tsv"), + sample=pd.unique(samples_table.index.values), + unique=["Unique", "UniqueMultiple"]), zpca_salmon = expand(os.path.join( config["output_dir"], @@ -1529,7 +1365,7 @@ rule multiqc_report: "multiqc_report.stdout.log") singularity: - "docker://ewels/multiqc:1.7" + "docker://zavolab/multiqc-plugins:1.0.0" shell: "(multiqc \ diff --git a/pipeline_documentation.md b/pipeline_documentation.md index 17decd1845b6a8c54bec126d401db7ac20ed2689..c5e45a9158aa09403a97d5a825c969bab97c3c9e 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -26,8 +26,6 @@ on installation and usage please see [here](README.md). - [**sort_bed_4_big**](#sort_bed_4_big) - [**prepare_bigWig**](#prepare_bigwig) - [**calculate_TIN_scores**](#calculate_tin_scores) - - [**merge_TIN_scores**](#merge_tin_scores) - - [**plot_TIN_scores**](#plot_tin_scores) - [**salmon_quantmerge_genes**](#salmon_quantmerge_genes) - [**salmon_quantmerge_transcripts**](#salmon_quantmerge_transcripts) - [**kallisto_merge_genes**](#kallisto_merge_genes) @@ -36,8 +34,6 @@ on installation and usage please see [here](README.md). - [**pca_salmon**](#pca_salmon) - [**generate_alfa_index**](#generate_alfa_index) - [**alfa_qc**](#alfa_qc) - - [**alfa_qc_all_samples**](#alfa_qc_all_samples) - - [**alfa_concat_results**](#alfa_concat_results) - [**prepare_multiqc_config**](#prepare_multiqc_config) - [**multiqc_report**](#multiqc_report) - [**finish**](#finish) @@ -365,29 +361,6 @@ Calculates the Transcript Integrity Number (TIN) for each transcript with - TIN score table (custom `tsv`); used in [**merge_TIN_scores**](#merge_tin_scores) -#### `merge_TIN_scores` - -Merges TIN score tables for all samples with [custom script][custom-script-tin]. - -- **Input** - - TIN score table (custom `tsv`); per sample; from - [**calculate_TIN_scores**](#calculate_tin_scores) -- **Output** - - TIN score table (custom `tsv`); for all samples; used in - [**plot_TIN_scores**](#plot_tin_scores) - -#### `plot_TIN_scores` - -Generate sample-wise [box plots](https://en.wikipedia.org/wiki/Box_plot) of -TIN scores with [custom script][custom-script-tin]. - -- **Input** - - TIN score table (custom `tsv`); for all samples; from - [**merge_TIN_scores**](#merge_tin_scores) -- **Output** - - TIN score box plots (`.pdf` and `.png`); used in - [**multiqc_report**](#multiqc_report) - #### `salmon_quantmerge_genes` Merge gene-level expression estimates for all samples with @@ -502,27 +475,6 @@ Annotate alignments with [**ALFA**](#third-party-software-used). - Feature counts table (custom `.tsv`); used in [**alfa_qc_all_samples**](#alfa_qc_all_samples) -#### `alfa_qc_all_samples` - -Combines output of all samples with [**ALFA**](#third-party-software-used). - -- **Input** - - Feature counts table (custom `.tsv`); from [**alfa_qc**](#alfa_qc) -- **Output** - - Figures for biotypes and feature categories (`.pdf`); summarized for all - samples together; used in [**alfa_concat_results**](#alfa_concat_results) - -#### `alfa_concat_results` - -Concatenate and convert ALFA output plots into single plot with -[**ImageMagick**](#third-party-software-used). - -- **Input** - - Figures for biotypes and feature categories (`.pdf`); for individual and - summarized for all samples -- **Output** - - ALFA plot (`.png`), combined; used in [**multiqc_report**](#multiqc_report) - #### `prepare_multiqc_config` Prepare config file for [**MultiQC**](#third-party-software-used). diff --git a/tests/test_integration_workflow/expected_output.files b/tests/test_integration_workflow/expected_output.files index e1cdb51cc4fa398ce9ed25f6c8a498823521be30..a9026e6abe1da853ccee4e74f395d5a441de8cbf 100644 --- a/tests/test_integration_workflow/expected_output.files +++ b/tests/test_integration_workflow/expected_output.files @@ -103,6 +103,7 @@ results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Uniqu results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw results/multiqc_summary/multiqc_data/multiqc_fastqc.txt results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt +results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt results/multiqc_summary/multiqc_data/multiqc_star.txt results/multiqc_summary/multiqc_data/multiqc_kallisto.txt results/multiqc_summary/multiqc_data/multiqc_general_stats.txt diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5 index 9bfdb92d52fcbe9ddece85178267dcc32eb9ddf2..4093dedeff58e800de8044ed11730b469e445ec5 100644 --- a/tests/test_integration_workflow/expected_output.md5 +++ b/tests/test_integration_workflow/expected_output.md5 @@ -84,7 +84,8 @@ ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synt 69e2bf688165e9fb7c9c49a8763f5632 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw ba090b1b4a2473891de97493d3244956 results/multiqc_summary/multiqc_data/multiqc_fastqc.txt -3e4db5fad83e162bcc19abbe81333a95 results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt +d8118d944149eecc691d182448696e7f results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt +a127fabda5c3aad9d95414dc4fbc11c3 results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt 0c6363588cf6ff74d49f27c164185918 results/multiqc_summary/multiqc_data/multiqc_star.txt dd81441ca97912a62292d317af2c107c results/multiqc_summary/multiqc_data/multiqc_kallisto.txt 0703b4cb7ec2abfab13ccd5f58c2d536 results/multiqc_summary/multiqc_data/multiqc_general_stats.txt diff --git a/tests/test_integration_workflow_multiple_lanes/expected_output.md5 b/tests/test_integration_workflow_multiple_lanes/expected_output.md5 index c52d4c0603f6580840d9bb829a48bb5f5a1dbc18..1f2ce96ae2f98e36cd96d7b4075b1e3a344bd27e 100644 --- a/tests/test_integration_workflow_multiple_lanes/expected_output.md5 +++ b/tests/test_integration_workflow_multiple_lanes/expected_output.md5 @@ -83,7 +83,8 @@ ed3428feeb7257b0a69ead76a417e339 results/samples/synthetic_10_reads_mate_1_synt ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_UniqueMultiple_plus.bw 69e2bf688165e9fb7c9c49a8763f5632 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw -3e4db5fad83e162bcc19abbe81333a95 results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt +d8118d944149eecc691d182448696e7f results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt +a127fabda5c3aad9d95414dc4fbc11c3 results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt 0c6363588cf6ff74d49f27c164185918 results/multiqc_summary/multiqc_data/multiqc_star.txt dd81441ca97912a62292d317af2c107c results/multiqc_summary/multiqc_data/multiqc_kallisto.txt ba090b1b4a2473891de97493d3244956 results/multiqc_summary/multiqc_data/multiqc_fastqc.txt diff --git a/workflow/scripts/zarp_multiqc_config.py b/workflow/scripts/zarp_multiqc_config.py index 68d2d6cf0b10f405db0eee2edf66d72898eb69b9..0b72f853fd5ad8f1824a09af7ee62833ac268e6e 100644 --- a/workflow/scripts/zarp_multiqc_config.py +++ b/workflow/scripts/zarp_multiqc_config.py @@ -89,7 +89,7 @@ report_header_info: - Analysis Author: "{author_name}" - Contact E-mail: "{author_email}" -top_modules: +module_order: - fastqc: path_filters: @@ -109,17 +109,13 @@ top_modules: path_filters: - "*/*/map_genome/*" - - alfa: - name: "ALFA" - anchor: "ALFA" + - ALFA: path_filters: - - "*/ALFA_plots.concat_mqc.png" + - "*/*/ALFA/*/*ALFA_feature_counts.tsv" - - TIN_scores: - name: "TIN_scores" - anchor: "TIN_scores" + - tin-score: path_filters: - - "*/TIN_scores_boxplot_mqc.png" + - "*/*/TIN/TIN_score.tsv" - salmon: path_filters: @@ -143,8 +139,6 @@ fn_clean_exts: - '.pseudo' - '.salmon' - '.sam' - - 'mqc' - - '.png' ...""" with open(options.config, "w") as config: