From dcbb51e6aefa9f21843fb87513c10e93d03e9b4e Mon Sep 17 00:00:00 2001
From: burri0000 <dominik.burri@unibas.ch>
Date: Wed, 18 Mar 2020 13:38:03 +0100
Subject: [PATCH] included ALFA results for Unique reads. Fixes #93

---
 Snakefile                                     | 38 +++++++++++++------
 tests/test_alfa/expected_output.md5           | 36 ++++++++++++------
 tests/test_alfa/test.sh                       |  3 +-
 .../expected_output.md5                       | 24 +++++++-----
 4 files changed, 67 insertions(+), 34 deletions(-)

diff --git a/Snakefile b/Snakefile
index 97db0e3..65c9b5e 100644
--- a/Snakefile
+++ b/Snakefile
@@ -101,16 +101,20 @@ rule finish:
             "{seqmode}",
             "{sample}",
             "ALFA",
-            "ALFA_plots.Biotypes.pdf"),
+            "{unique_type}",
+            "ALFA_plots.Categories.pdf"),
             zip,
             sample= [i for i in list(samples_table.index.values)],
             seqmode= [
                 samples_table.loc[i,"seqmode"] 
-                for i in list(samples_table.index.values)]),
-        alfa_all_samples = os.path.join(
+                for i in list(samples_table.index.values)],
+            unique_type = ["Unique", "UniqueMultiple"]),
+        alfa_all_samples = expand(os.path.join(
             config["output_dir"],
             "ALFA",
-            "ALFA_plots.Categories.pdf")
+            "{unique_type}",
+            "ALFA_plots.Categories.pdf"),
+            unique_type = ["Unique", "UniqueMultiple"]),
 
 
 
@@ -460,13 +464,13 @@ rule rename_star_rpm_for_alfa:
             "{seqmode}",
             "{sample}",
             "STAR_coverage",
-            "{sample}_Signal.UniqueMultiple.str1.out.bg"),
+            "{sample}_Signal.{unique}.str1.out.bg"),
         str2 = os.path.join(
             config["output_dir"],
             "{seqmode}",
             "{sample}",
             "STAR_coverage",
-            "{sample}_Signal.UniqueMultiple.str2.out.bg")
+            "{sample}_Signal.{unique}.str2.out.bg")
     
     output:
         plus = os.path.join(
@@ -474,13 +478,15 @@ rule rename_star_rpm_for_alfa:
             "{seqmode}",
             "{sample}",
             "ALFA",
-            "{sample}_Signal.UniqueMultiple.out.plus.bg"),
+            "{unique}",
+            "{sample}_Signal.{unique}.out.plus.bg"),
         minus = os.path.join(
             config["output_dir"],
             "{seqmode}",
             "{sample}",
             "ALFA",
-            "{sample}_Signal.UniqueMultiple.out.minus.bg")
+            "{unique}",
+            "{sample}_Signal.{unique}.out.minus.bg")
     
     params:
         orientation = lambda wildcards: samples_table.loc[wildcards.sample, "kallisto_directionality"]
@@ -825,13 +831,15 @@ rule alfa_qc:
             "{seqmode}",
             "{sample}",
             "ALFA",
-            "{sample}_Signal.UniqueMultiple.out.plus.bg"),
+            "{unique}",
+            "{sample}_Signal.{unique}.out.plus.bg"),
         minus = os.path.join(
             config["output_dir"],
             "{seqmode}",
             "{sample}",
             "ALFA",
-            "{sample}_Signal.UniqueMultiple.out.minus.bg"),
+            "{unique}",
+            "{sample}_Signal.{unique}.out.minus.bg"),
         gtf = lambda wildcards: os.path.join(config["alfa_indexes"], 
             samples_table.loc[wildcards.sample, "organism"], 
             str(samples_table.loc[wildcards.sample, "index_size"]), 
@@ -844,18 +852,21 @@ rule alfa_qc:
             "{seqmode}",
             "{sample}",
             "ALFA",
+            "{unique}",
             "ALFA_plots.Biotypes.pdf"),
         categories = os.path.join(
             config["output_dir"],
             "{seqmode}",
             "{sample}",
             "ALFA",
+            "{unique}",
             "ALFA_plots.Categories.pdf"),
         table = os.path.join(
             config["output_dir"],
             "{seqmode}",
             "{sample}",
             "ALFA",
+            "{unique}",
             "{sample}.ALFA_feature_counts.tsv")
 
     params:
@@ -874,7 +885,7 @@ rule alfa_qc:
             config["log_dir"], 
             "{seqmode}", 
             "{sample}", 
-            "alfa_qc.log"))
+            "alfa_qc.{unique}.log"))
 
     shell:
         """ 
@@ -893,6 +904,7 @@ rule alfa_qc_all_samples:
             samples_table.loc[sample1, "seqmode"],
             str(sample1),
             "ALFA",
+            "{unique}",
             sample1 + ".ALFA_feature_counts.tsv")
             for sample1 in list(samples_table.index.values)]
 
@@ -900,10 +912,12 @@ rule alfa_qc_all_samples:
         biotypes = os.path.join(
             config["output_dir"],
             "ALFA",
+            "{unique}",
             "ALFA_plots.Biotypes.pdf"),
         categories = os.path.join(
             config["output_dir"],
             "ALFA",
+            "{unique}",
             "ALFA_plots.Categories.pdf")
 
     params:
@@ -912,7 +926,7 @@ rule alfa_qc_all_samples:
     log: 
         os.path.abspath(
             os.path.join(config["log_dir"], 
-            "alfa_qc_all_samples.log"))
+            "alfa_qc_all_samples.{unique}.log"))
 
     singularity:
         "docker://zavolab/alfa:1.1.1"
diff --git a/tests/test_alfa/expected_output.md5 b/tests/test_alfa/expected_output.md5
index 57f9dfd..7b2472f 100644
--- a/tests/test_alfa/expected_output.md5
+++ b/tests/test_alfa/expected_output.md5
@@ -1,5 +1,29 @@
 90e42aa46890e9cd0a47800428699fbf  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
 ccc3cf5a57fddb0d469e597d4376b1bf  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index
+e5959524a2daf35da9249fb313920315  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/Unique/paired_end_R1_on_minus_antisense.ALFA_feature_counts.tsv
+c406c1800d3690dd774aaa7e3c190523  results/paired_end/paired_end_R1_on_minus_sense/ALFA/Unique/paired_end_R1_on_minus_sense.ALFA_feature_counts.tsv
+c406c1800d3690dd774aaa7e3c190523  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/Unique/paired_end_R1_on_plus_antisense.ALFA_feature_counts.tsv
+e5959524a2daf35da9249fb313920315  results/paired_end/paired_end_R1_on_plus_sense/ALFA/Unique/paired_end_R1_on_plus_sense.ALFA_feature_counts.tsv
+e5959524a2daf35da9249fb313920315  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_minus_antisense.ALFA_feature_counts.tsv
+c406c1800d3690dd774aaa7e3c190523  results/paired_end/paired_end_R1_on_minus_sense/ALFA/UniqueMultiple/paired_end_R1_on_minus_sense.ALFA_feature_counts.tsv
+c406c1800d3690dd774aaa7e3c190523  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_plus_antisense.ALFA_feature_counts.tsv
+e5959524a2daf35da9249fb313920315  results/paired_end/paired_end_R1_on_plus_sense/ALFA/UniqueMultiple/paired_end_R1_on_plus_sense.ALFA_feature_counts.tsv
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.minus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.plus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_sense/ALFA/UniqueMultiple/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.minus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_sense/ALFA/UniqueMultiple/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.plus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.minus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.plus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_sense/ALFA/UniqueMultiple/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.minus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_sense/ALFA/UniqueMultiple/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.plus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/Unique/paired_end_R1_on_minus_antisense_Signal.Unique.out.minus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/Unique/paired_end_R1_on_minus_antisense_Signal.Unique.out.plus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_sense/ALFA/Unique/paired_end_R1_on_minus_sense_Signal.Unique.out.minus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_sense/ALFA/Unique/paired_end_R1_on_minus_sense_Signal.Unique.out.plus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/Unique/paired_end_R1_on_plus_antisense_Signal.Unique.out.minus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/Unique/paired_end_R1_on_plus_antisense_Signal.Unique.out.plus.bg
+5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_sense/ALFA/Unique/paired_end_R1_on_plus_sense_Signal.Unique.out.minus.bg
+8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_sense/ALFA/Unique/paired_end_R1_on_plus_sense_Signal.Unique.out.plus.bg
 5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_antisense/STAR_coverage/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.str1.out.bg
 8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_antisense/STAR_coverage/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.str2.out.bg
 5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_antisense/STAR_coverage/paired_end_R1_on_minus_antisense_Signal.Unique.str1.out.bg
@@ -16,15 +40,3 @@ ccc3cf5a57fddb0d469e597d4376b1bf  results/alfa_indexes/homo_sapiens/75/ALFA/sort
 5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_sense/STAR_coverage/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.str2.out.bg
 8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_sense/STAR_coverage/paired_end_R1_on_plus_sense_Signal.Unique.str1.out.bg
 5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_sense/STAR_coverage/paired_end_R1_on_plus_sense_Signal.Unique.str2.out.bg
-5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.minus.bg
-8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.plus.bg
-8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_minus_sense/ALFA/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.minus.bg
-5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_minus_sense/ALFA/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.plus.bg
-8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.minus.bg
-5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.plus.bg
-5e90c760710980f4f4866dbe9aa32c6c  results/paired_end/paired_end_R1_on_plus_sense/ALFA/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.minus.bg
-8e23d52d7f635d927e292174f33168eb  results/paired_end/paired_end_R1_on_plus_sense/ALFA/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.plus.bg
-e5959524a2daf35da9249fb313920315  results/paired_end/paired_end_R1_on_minus_antisense/ALFA/paired_end_R1_on_minus_antisense.ALFA_feature_counts.tsv
-c406c1800d3690dd774aaa7e3c190523  results/paired_end/paired_end_R1_on_minus_sense/ALFA/paired_end_R1_on_minus_sense.ALFA_feature_counts.tsv
-c406c1800d3690dd774aaa7e3c190523  results/paired_end/paired_end_R1_on_plus_antisense/ALFA/paired_end_R1_on_plus_antisense.ALFA_feature_counts.tsv
-e5959524a2daf35da9249fb313920315  results/paired_end/paired_end_R1_on_plus_sense/ALFA/paired_end_R1_on_plus_sense.ALFA_feature_counts.tsv
diff --git a/tests/test_alfa/test.sh b/tests/test_alfa/test.sh
index 280749c..3d5c325 100755
--- a/tests/test_alfa/test.sh
+++ b/tests/test_alfa/test.sh
@@ -33,7 +33,8 @@ snakemake \
     --use-singularity \
     --singularity-args="--bind ${PWD}/../input_files" \
     --verbose \
-    results/ALFA/ALFA_plots.Categories.pdf
+    results/ALFA/UniqueMultiple/ALFA_plots.Categories.pdf \
+    results/ALFA/Unique/ALFA_plots.Categories.pdf
 
 # Check md5 sum of some output files
 find results/ -type f -name \*\.gz -exec gunzip '{}' \;
diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5
index c017ef0..e14eb7e 100644
--- a/tests/test_integration_workflow/expected_output.md5
+++ b/tests/test_integration_workflow/expected_output.md5
@@ -78,6 +78,21 @@ e72f5d798c99272f8c0166dc77247db1  results/single_end/synthetic_10_reads_mate_1_s
 92bcd0592d22a6a58d0360fc76103e56  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/observed_bias
 92bcd0592d22a6a58d0360fc76103e56  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/observed_bias_3p
 d41d8cd98f00b204e9800998ecf8427e  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/unmapped_names.txt
+3ce47cb1d62482c5d62337751d7e8552  results/transcriptome/homo_sapiens/transcriptome.fa
+6b44c507f0a1c9f7369db0bb1deef0fd  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
+2caebc23faf78fdbbbdbb118d28bd6b5  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index
+c1254a0bae19ac3ffc39f73099ffcf2b  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
+c1254a0bae19ac3ffc39f73099ffcf2b  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
+c266d31e0a2ad84975cb9de335891e64  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.out.minus.bg
+0139e75ddbfe6eb081c2c2d9b9108ab4  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.out.plus.bg
+bcccf679a8c083d01527514c9f5680a0  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.out.minus.bg
+ea91b4f85622561158bff2f7c9c312b3  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.out.plus.bg
+c1254a0bae19ac3ffc39f73099ffcf2b  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
+c1254a0bae19ac3ffc39f73099ffcf2b  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
+c266d31e0a2ad84975cb9de335891e64  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg
+0139e75ddbfe6eb081c2c2d9b9108ab4  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg
+bcccf679a8c083d01527514c9f5680a0  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg
+ea91b4f85622561158bff2f7c9c312b3  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg
 0139e75ddbfe6eb081c2c2d9b9108ab4  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg
 c266d31e0a2ad84975cb9de335891e64  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg
 0139e75ddbfe6eb081c2c2d9b9108ab4  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg
@@ -86,12 +101,3 @@ ea91b4f85622561158bff2f7c9c312b3  results/single_end/synthetic_10_reads_mate_1_s
 bcccf679a8c083d01527514c9f5680a0  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg
 ea91b4f85622561158bff2f7c9c312b3  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg
 bcccf679a8c083d01527514c9f5680a0  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str2.out.bg
-3ce47cb1d62482c5d62337751d7e8552  results/transcriptome/homo_sapiens/transcriptome.fa
-6b44c507f0a1c9f7369db0bb1deef0fd  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
-2caebc23faf78fdbbbdbb118d28bd6b5  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index
-c1254a0bae19ac3ffc39f73099ffcf2b  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
-c266d31e0a2ad84975cb9de335891e64  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg
-0139e75ddbfe6eb081c2c2d9b9108ab4  results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg
-c1254a0bae19ac3ffc39f73099ffcf2b  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
-bcccf679a8c083d01527514c9f5680a0  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg
-ea91b4f85622561158bff2f7c9c312b3  results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg
\ No newline at end of file
-- 
GitLab