Merge branch 'dev' into results_dir

incorporate changes of updated dev

Merge branch 'dev' into results_dir
c2ed5f3d · Dominik Burri · bc168805 · 736d1a3d · c2ed5f3d · c2ed5f3d
Commit c2ed5f3d authored 4 years ago by Dominik Burri
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -13,8 +13,8 @@ test:
    # add unit tests here
    # add script tests here
    - bash tests/test_scripts_prepare_inputs_table/test.sh
-    - bash tests/test_scripts_prepare_inputs_labkey/test.sh
-    - bash tests/test_alfa/test.sh
+    #- bash tests/test_scripts_prepare_inputs_labkey/test.sh
+    #- bash tests/test_alfa/test.sh
    # add integration tests here
    - bash tests/test_create_dag_image/test.sh
    - bash tests/test_create_rule_graph/test.sh

--- a/Snakefile
+++ b/Snakefile
@@ -552,101 +552,6 @@ rule calculate_TIN_scores:
        -n {threads} > {output.TIN_score};) 2> {log.stderr}"


-rule merge_TIN_scores:
-    """
-        Merge TIN scores tables
-    """
-    input:
-        TIN_score = expand(
-            os.path.join(
-                config['output_dir'],
-                "samples",
-                "{sample}",
-                "TIN",
-                "TIN_score.tsv"),
-            sample=pd.unique(samples_table.index.values)),
-
-    output:
-        TIN_scores_merged = os.path.join(
-            config['output_dir'],
-            "TIN_scores_merged.tsv")
-
-    log:
-        stderr = os.path.join(
-            config['log_dir'],
-            "merge_TIN_scores.stderr.log"),
-        stdout = os.path.join(
-            config["log_dir"],
-            "merge_TIN_scores.stdout.log")
-
-    params:
-        TIN_score_merged_paths = ",".join(expand(
-            os.path.join(
-                config['output_dir'],
-                "samples",
-                "{sample}",
-                "TIN",
-                "TIN_score.tsv"),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[get_sample('seqmode',
-                    search_id='index',
-                    search_value=i) for i in pd.unique(samples_table.index.values)]))
-
-    threads: 1
-
-    singularity:
-        "docker://zavolab/tin_score_calculation:0.2.0-slim"
-
-    shell:
-        "(tin_score_merge.py \
-        --input-files {params.TIN_score_merged_paths} \
-        --output-file {output.TIN_scores_merged}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-rule plot_TIN_scores:
-    """
-        Generate TIN scores boxplots
-    """
-    input:
-        TIN_scores_merged = os.path.join(
-            config['output_dir'],
-            "TIN_scores_merged.tsv"),
-
-    output:
-        TIN_boxplot_PNG = os.path.join(
-            config['output_dir'],
-            "TIN_scores_boxplot_mqc.png"),
-        TIN_boxplot_PDF = os.path.join(
-            config['output_dir'],
-            "TIN_scores_boxplot_mqc.pdf")
-
-    params:
-        TIN_boxplot_prefix = os.path.join(
-            config['output_dir'],
-            "TIN_scores_boxplot_mqc")
-
-    log:
-        stderr = os.path.join(
-            config['log_dir'],
-            "plot_TIN_scores.stderr.log"),
-        stdout = os.path.join(
-            config["log_dir"],
-            "plot_TIN_scores.stdout.log")
-
-    threads: 1
-
-    singularity:
-        "docker://zavolab/tin_score_calculation:0.2.0-slim"
-
-    shell:
-        "(tin_score_plot.py \
-        --input-file {input.TIN_scores_merged} \
-        --output-file-prefix {params.TIN_boxplot_prefix}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
 rule salmon_quantmerge_genes:
    '''
        Merge gene quantifications into a single file
@@ -1326,83 +1231,6 @@ rule alfa_qc:
        -s {params.alfa_orientation}) &> {log}"


-rule alfa_qc_all_samples:
-    '''
-        Run ALFA from stranded bedgraph files on all samples
-    '''
-    input:
-        tables = lambda wildcards:
-            expand(
-                os.path.join(
-                    config["output_dir"],
-                    "samples",
-                    "{sample}",
-                    "ALFA",
-                    "{unique}",
-                    "{sample}.ALFA_feature_counts.tsv"),
-                sample=pd.unique(samples_table.index.values),
-                unique=wildcards.unique)
-    output:
-        biotypes = os.path.join(
-            config["output_dir"],
-            "ALFA",
-            "{unique}",
-            "ALFA_plots.Biotypes.pdf"),
-        categories = os.path.join(
-            config["output_dir"],
-            "ALFA",
-            "{unique}",
-            "ALFA_plots.Categories.pdf")
-
-    params:
-        out_dir = lambda wildcards, output:
-            os.path.dirname(output.biotypes)
-
-    log:
-        os.path.join(
-            config["log_dir"],
-            "alfa_qc_all_samples.{unique}.log")
-
-    singularity:
-        "docker://zavolab/alfa:1.1.1-slim"
-
-    shell:
-        "(alfa -c {input.tables} -o {params.out_dir}) &> {log}"
-
-
-rule alfa_concat_results:
-    input:
-        expand(
-            os.path.join(
-                config["output_dir"],
-                "ALFA",
-                "{unique}",
-                "ALFA_plots.{annotation}.pdf"),
-            unique=["Unique", "UniqueMultiple"],
-            annotation=["Categories", "Biotypes"])
-
-    output:
-        os.path.join(
-            config["output_dir"],
-            "ALFA",
-            "ALFA_plots_mqc.png")
-
-    params:
-        density = 300
-
-    log:
-        os.path.join(
-            config["log_dir"],
-            "alfa_qc_all_samples.concat.log")
-
-    singularity:
-        "docker://zavolab/imagemagick:7.0.8"
-
-    shell:
-        "(convert -append -density {params.density} \
-            {input} {output}) &> {log}"
-
-
 rule prepare_multiqc_config:
    '''
        Prepare config for the MultiQC
@@ -1479,18 +1307,26 @@ rule multiqc_report:
            seqmode=[get_sample('seqmode', search_id='index', search_value=i) 
                for i in pd.unique(samples_table.index.values)]),

-        TIN_boxplot_PNG = os.path.join(
-            config['output_dir'],
-            "TIN_scores_boxplot_mqc.png"),
-
-        TIN_boxplot_PDF = os.path.join(
-            config['output_dir'],
-            "TIN_scores_boxplot_mqc.pdf"),
+        TIN_score = expand(
+            os.path.join(
+                config['output_dir'],
+                "samples",
+                "{sample}",
+                "TIN",
+                "TIN_score.tsv"),
+            sample=pd.unique(samples_table.index.values)),

-        alfa_concat_out = os.path.join(
-            config["output_dir"],
-            "ALFA",
-            "ALFA_plots_mqc.png"),
+        tables = lambda wildcards:
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "samples",
+                    "{sample}",
+                    "ALFA",
+                    "{unique}",
+                    "{sample}.ALFA_feature_counts.tsv"),
+                sample=pd.unique(samples_table.index.values),
+                unique=["Unique", "UniqueMultiple"]),

        zpca_salmon = expand(os.path.join(
            config["output_dir"],
@@ -1529,7 +1365,7 @@ rule multiqc_report:
            "multiqc_report.stdout.log")

    singularity:
-        "docker://ewels/multiqc:1.7"
+        "docker://zavolab/multiqc-plugins:1.0.0"

    shell:
        "(multiqc \

--- a/pipeline_documentation.md
+++ b/pipeline_documentation.md
@@ -26,8 +26,6 @@ on installation and usage please see [here](README.md).
    - [**sort_bed_4_big**](#sort_bed_4_big)
    - [**prepare_bigWig**](#prepare_bigwig)
    - [**calculate_TIN_scores**](#calculate_tin_scores)
-    - [**merge_TIN_scores**](#merge_tin_scores)
-    - [**plot_TIN_scores**](#plot_tin_scores)
    - [**salmon_quantmerge_genes**](#salmon_quantmerge_genes)
    - [**salmon_quantmerge_transcripts**](#salmon_quantmerge_transcripts)
    - [**kallisto_merge_genes**](#kallisto_merge_genes)
@@ -36,8 +34,6 @@ on installation and usage please see [here](README.md).
    - [**pca_salmon**](#pca_salmon)
    - [**generate_alfa_index**](#generate_alfa_index)
    - [**alfa_qc**](#alfa_qc)
-    - [**alfa_qc_all_samples**](#alfa_qc_all_samples)
-    - [**alfa_concat_results**](#alfa_concat_results)
    - [**prepare_multiqc_config**](#prepare_multiqc_config)
    - [**multiqc_report**](#multiqc_report)
    - [**finish**](#finish)
@@ -365,29 +361,6 @@ Calculates the Transcript Integrity Number (TIN) for each transcript with
  - TIN score table (custom `tsv`); used in
    [**merge_TIN_scores**](#merge_tin_scores)

-#### `merge_TIN_scores`
-
-Merges TIN score tables for all samples with [custom script][custom-script-tin].
-
- **Input**
-  - TIN score table (custom `tsv`); per sample; from
-    [**calculate_TIN_scores**](#calculate_tin_scores)
- **Output**
-  - TIN score table (custom `tsv`); for all samples; used in
-    [**plot_TIN_scores**](#plot_tin_scores)
-
-#### `plot_TIN_scores`
-
-Generate sample-wise [box plots](https://en.wikipedia.org/wiki/Box_plot) of
-TIN scores with [custom script][custom-script-tin].
-
- **Input**
-  - TIN score table (custom `tsv`); for all samples; from
-    [**merge_TIN_scores**](#merge_tin_scores)
- **Output**
-  - TIN score box plots (`.pdf` and `.png`); used in
-    [**multiqc_report**](#multiqc_report)
-
 #### `salmon_quantmerge_genes`

 Merge gene-level expression estimates for all samples with
@@ -502,27 +475,6 @@ Annotate alignments with [**ALFA**](#third-party-software-used).
  - Feature counts table (custom `.tsv`); used in
    [**alfa_qc_all_samples**](#alfa_qc_all_samples)

-#### `alfa_qc_all_samples`
-
-Combines output of all samples with [**ALFA**](#third-party-software-used).
-
- **Input**
-  - Feature counts table (custom `.tsv`); from [**alfa_qc**](#alfa_qc)
- **Output**
-  - Figures for biotypes and feature categories (`.pdf`); summarized for all
-    samples together; used in [**alfa_concat_results**](#alfa_concat_results)
-
-#### `alfa_concat_results`
-
-Concatenate and convert ALFA output plots into single plot with
-[**ImageMagick**](#third-party-software-used).
-
- **Input**
-  - Figures for biotypes and feature categories (`.pdf`); for individual and
-    summarized for all samples
- **Output**
-  - ALFA plot (`.png`), combined; used in [**multiqc_report**](#multiqc_report)
-
 #### `prepare_multiqc_config`

 Prepare config file for [**MultiQC**](#third-party-software-used).

--- a/tests/test_integration_workflow/expected_output.files
+++ b/tests/test_integration_workflow/expected_output.files
@@ -103,6 +103,7 @@ results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Uniqu
 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw
 results/multiqc_summary/multiqc_data/multiqc_fastqc.txt
 results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
+results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt
 results/multiqc_summary/multiqc_data/multiqc_star.txt
 results/multiqc_summary/multiqc_data/multiqc_kallisto.txt
 results/multiqc_summary/multiqc_data/multiqc_general_stats.txt

--- a/tests/test_integration_workflow/expected_output.md5
+++ b/tests/test_integration_workflow/expected_output.md5
@@ -84,7 +84,8 @@ ec5aab1b79e7880dfa590e5bc7db5232  results/samples/synthetic_10_reads_paired_synt
 69e2bf688165e9fb7c9c49a8763f5632  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw
 ec5aab1b79e7880dfa590e5bc7db5232  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw
 ba090b1b4a2473891de97493d3244956  results/multiqc_summary/multiqc_data/multiqc_fastqc.txt
-3e4db5fad83e162bcc19abbe81333a95  results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
+d8118d944149eecc691d182448696e7f  results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
+a127fabda5c3aad9d95414dc4fbc11c3  results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt
 0c6363588cf6ff74d49f27c164185918  results/multiqc_summary/multiqc_data/multiqc_star.txt
 dd81441ca97912a62292d317af2c107c  results/multiqc_summary/multiqc_data/multiqc_kallisto.txt
 0703b4cb7ec2abfab13ccd5f58c2d536  results/multiqc_summary/multiqc_data/multiqc_general_stats.txt

--- a/tests/test_integration_workflow_multiple_lanes/expected_output.md5
+++ b/tests/test_integration_workflow_multiple_lanes/expected_output.md5
@@ -83,7 +83,8 @@ ed3428feeb7257b0a69ead76a417e339  results/samples/synthetic_10_reads_mate_1_synt
 ec5aab1b79e7880dfa590e5bc7db5232  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_UniqueMultiple_plus.bw
 69e2bf688165e9fb7c9c49a8763f5632  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw
 ec5aab1b79e7880dfa590e5bc7db5232  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw
-3e4db5fad83e162bcc19abbe81333a95  results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
+d8118d944149eecc691d182448696e7f  results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
+a127fabda5c3aad9d95414dc4fbc11c3  results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt
 0c6363588cf6ff74d49f27c164185918  results/multiqc_summary/multiqc_data/multiqc_star.txt
 dd81441ca97912a62292d317af2c107c  results/multiqc_summary/multiqc_data/multiqc_kallisto.txt
 ba090b1b4a2473891de97493d3244956  results/multiqc_summary/multiqc_data/multiqc_fastqc.txt

--- a/workflow/scripts/zarp_multiqc_config.py
+++ b/workflow/scripts/zarp_multiqc_config.py
@@ -89,7 +89,7 @@ report_header_info:
  - Analysis Author: "{author_name}"
  - Contact E-mail: "{author_email}"

-top_modules:
+module_order:

  - fastqc:
      path_filters:
@@ -109,17 +109,13 @@ top_modules:
      path_filters:
      - "*/*/map_genome/*"

-  - alfa:
-      name: "ALFA"
-      anchor: "ALFA"
+  - ALFA:
      path_filters:
-      - "*/ALFA_plots.concat_mqc.png"
+      - "*/*/ALFA/*/*ALFA_feature_counts.tsv"

-  - TIN_scores:
-      name: "TIN_scores"
-      anchor: "TIN_scores"
+  - tin-score:
      path_filters:
-      - "*/TIN_scores_boxplot_mqc.png"
+      - "*/*/TIN/TIN_score.tsv"

  - salmon:
      path_filters:
@@ -143,8 +139,6 @@ fn_clean_exts:
  - '.pseudo'
  - '.salmon'
  - '.sam'
-  - 'mqc'
-  - '.png'
 ..."""

    with open(options.config, "w") as config: